From f0c12fbdce04d09bf13b141d549e5e385c64caad Mon Sep 17 00:00:00 2001 From: Hind-M Date: Tue, 1 Jun 2021 18:39:31 +0200 Subject: First version allowing to fetch remote datasets --- src/python/CMakeLists.txt | 2 + src/python/gudhi/datasets/remote.py | 85 +++++++++++++++++++++++++++++++++ src/python/test/test_remote_datasets.py | 22 +++++++++ 3 files changed, 109 insertions(+) create mode 100644 src/python/gudhi/datasets/remote.py create mode 100644 src/python/test/test_remote_datasets.py diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index 98f2b85f..6f117588 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -542,6 +542,8 @@ if(PYTHONINTERP_FOUND) add_gudhi_py_test(test_dtm_rips_complex) endif() + # Fetch remote datasets + add_gudhi_py_test(test_remote_datasets) # Set missing or not modules set(GUDHI_MODULES ${GUDHI_MODULES} "python" CACHE INTERNAL "GUDHI_MODULES") diff --git a/src/python/gudhi/datasets/remote.py b/src/python/gudhi/datasets/remote.py new file mode 100644 index 00000000..27076785 --- /dev/null +++ b/src/python/gudhi/datasets/remote.py @@ -0,0 +1,85 @@ +# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. +# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +# Author(s): Hind Montassif +# +# Copyright (C) 2021 Inria +# +# Modification(s): +# - YYYY/MM Author: Description of the modification + +import hashlib + +from os.path import join, exists +from os import makedirs + +from urllib.request import urlretrieve + + +def _checksum_sha256(file_path): + """ + Compute the file checksum using sha256 + + Parameters + ---------- + file_path: string + Full path of the created file. + + Returns + ------- + The hex digest of file_path + """ + sha256_hash = hashlib.sha256() + chunk_size = 4096 + with open(file_path,"rb") as f: + # Read and update hash string value in blocks of 4K + while True: + buffer = f.read(chunk_size) + if not buffer: + break + sha256_hash.update(buffer) + return sha256_hash.hexdigest() + +def fetch(url, filename, dirname = "remote_datasets", checksum_flag = False, file_checksum = None): + """ + Fetch the wanted dataset from the given url and save it in file_path + + Parameters + ---------- + url : string + The url to fetch the dataset from + filename : string + The filename to download + dirname : string + The directory to save the file to. + checksum_flag : boolean + To set if the user wants the file checksum. Default is 'False'. + Note that if checksum_flag is set to 'True', the file_checksum must be provided. + file_checksum : string + The file checksum using sha256 to check against the one computed on the downloaded file. + To be considered, checksum_flag must be set to 'True'. + Default is 'None'. + + Returns + ------- + file_path: string + Full path of the created file. + """ + if not exists(dirname): + makedirs(dirname) + + file_path = join(dirname, filename) + + urlretrieve(url, file_path) + + if (checksum_flag): + if file_checksum is None: + raise ValueError("The file checksum must be provided - different from None - for the check to be performed.") + + checksum = _checksum_sha256(file_path) + if file_checksum != checksum: + raise IOError("{} has a SHA256 checksum : {}, " + "different from expected : {}." + "The file may be corrupted or the given url may be wrong !".format(file_path, checksum, + file_checksum)) + + return file_path diff --git a/src/python/test/test_remote_datasets.py b/src/python/test/test_remote_datasets.py new file mode 100644 index 00000000..c4e752a7 --- /dev/null +++ b/src/python/test/test_remote_datasets.py @@ -0,0 +1,22 @@ +# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. +# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +# Author(s): Hind Montassif +# +# Copyright (C) 2021 Inria +# +# Modification(s): +# - YYYY/MM Author: Description of the modification + + +from gudhi.datasets import remote + +def test_fetch_remote_datasets(): + # Test files download from given urls + assert 'remote_datasets/spiral_2d.csv' == remote.fetch("https://raw.githubusercontent.com/Hind-M/gudhi-data/main/spiral_2d.csv", "spiral_2d.csv") + assert 'remote_datasets/sphere3D_pts_on_grid.off' == remote.fetch("https://raw.githubusercontent.com/Hind-M/gudhi-data/main/sphere3D_pts_on_grid.off", "sphere3D_pts_on_grid.off") + + # Test files download with checksums provided + assert 'remote_datasets/spiral_2d.csv' == remote.fetch("https://raw.githubusercontent.com/Hind-M/gudhi-data/main/spiral_2d.csv", "spiral_2d.csv", checksum_flag = True, + file_checksum = '37530355d980d957c4ec06b18c775f90a91e446107d06c6201c9b4000b077f38') + assert 'remote_datasets/sphere3D_pts_on_grid.off' == remote.fetch("https://raw.githubusercontent.com/Hind-M/gudhi-data/main/sphere3D_pts_on_grid.off", "sphere3D_pts_on_grid.off", + checksum_flag = True, file_checksum = '32f96d2cafb1177f0dd5e0a019b6ff5658e14a619a7815ae55ad0fc5e8bd3f88') -- cgit v1.2.3 From c2f0cf79af04ea3586a70c0a121a200353e989ac Mon Sep 17 00:00:00 2001 From: Hind-M Date: Wed, 2 Jun 2021 11:30:09 +0200 Subject: Add wrapping function to fecth spiral_2d.csv directly --- src/python/gudhi/datasets/remote.py | 24 +++++++++++++++++++++--- src/python/test/test_remote_datasets.py | 3 +++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/src/python/gudhi/datasets/remote.py b/src/python/gudhi/datasets/remote.py index 27076785..4a300b15 100644 --- a/src/python/gudhi/datasets/remote.py +++ b/src/python/gudhi/datasets/remote.py @@ -46,11 +46,11 @@ def fetch(url, filename, dirname = "remote_datasets", checksum_flag = False, fil Parameters ---------- url : string - The url to fetch the dataset from + The url to fetch the dataset from. filename : string - The filename to download + The name to give to downloaded file. dirname : string - The directory to save the file to. + The directory to save the file to. Default is "remote_datasets". checksum_flag : boolean To set if the user wants the file checksum. Default is 'False'. Note that if checksum_flag is set to 'True', the file_checksum must be provided. @@ -83,3 +83,21 @@ def fetch(url, filename, dirname = "remote_datasets", checksum_flag = False, fil file_checksum)) return file_path + +def fetch_spiral_2d(filename = "spiral_2d.csv", dirname = "remote_datasets"): + """ + Fetch spiral_2d.csv remotely + + Parameters + ---------- + filename : string + The name to give to downloaded file. Default is "spiral_2d.csv" + dirname : string + The directory to save the file to. Default is "remote_datasets". + + Returns + ------- + file_path: string + Full path of the created file. + """ + return fetch("https://raw.githubusercontent.com/Hind-M/gudhi-data/main/spiral_2d.csv", filename, dirname, True, '37530355d980d957c4ec06b18c775f90a91e446107d06c6201c9b4000b077f38') diff --git a/src/python/test/test_remote_datasets.py b/src/python/test/test_remote_datasets.py index c4e752a7..dc854e25 100644 --- a/src/python/test/test_remote_datasets.py +++ b/src/python/test/test_remote_datasets.py @@ -20,3 +20,6 @@ def test_fetch_remote_datasets(): file_checksum = '37530355d980d957c4ec06b18c775f90a91e446107d06c6201c9b4000b077f38') assert 'remote_datasets/sphere3D_pts_on_grid.off' == remote.fetch("https://raw.githubusercontent.com/Hind-M/gudhi-data/main/sphere3D_pts_on_grid.off", "sphere3D_pts_on_grid.off", checksum_flag = True, file_checksum = '32f96d2cafb1177f0dd5e0a019b6ff5658e14a619a7815ae55ad0fc5e8bd3f88') + + # Test spiral_2d.csv wrapping function + assert 'remote_datasets/spiral_2d.csv' == remote.fetch_spiral_2d() -- cgit v1.2.3 From baa2e67036dae8ec63321a4d9ff4e913780a8757 Mon Sep 17 00:00:00 2001 From: Hind-M Date: Wed, 2 Jun 2021 16:00:40 +0200 Subject: Modify test to consider both slash and backslash in the returned file path --- src/python/test/test_remote_datasets.py | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/src/python/test/test_remote_datasets.py b/src/python/test/test_remote_datasets.py index dc854e25..a822ebaa 100644 --- a/src/python/test/test_remote_datasets.py +++ b/src/python/test/test_remote_datasets.py @@ -9,17 +9,36 @@ from gudhi.datasets import remote +import re def test_fetch_remote_datasets(): # Test files download from given urls - assert 'remote_datasets/spiral_2d.csv' == remote.fetch("https://raw.githubusercontent.com/Hind-M/gudhi-data/main/spiral_2d.csv", "spiral_2d.csv") - assert 'remote_datasets/sphere3D_pts_on_grid.off' == remote.fetch("https://raw.githubusercontent.com/Hind-M/gudhi-data/main/sphere3D_pts_on_grid.off", "sphere3D_pts_on_grid.off") + path_file_dw = remote.fetch("https://raw.githubusercontent.com/Hind-M/gudhi-data/main/spiral_2d.csv", "spiral_2d.csv") + names_dw = re.split(r' |/|\\', path_file_dw) + assert 'remote_datasets' == names_dw[0] + assert 'spiral_2d.csv' == names_dw[1] + + path_file_dw = remote.fetch("https://raw.githubusercontent.com/Hind-M/gudhi-data/main/sphere3D_pts_on_grid.off", "sphere3D_pts_on_grid.off") + names_dw = re.split(r' |/|\\', path_file_dw) + assert 'remote_datasets' == names_dw[0] + assert 'sphere3D_pts_on_grid.off' == names_dw[1] + # Test files download with checksums provided - assert 'remote_datasets/spiral_2d.csv' == remote.fetch("https://raw.githubusercontent.com/Hind-M/gudhi-data/main/spiral_2d.csv", "spiral_2d.csv", checksum_flag = True, + path_file_dw = remote.fetch("https://raw.githubusercontent.com/Hind-M/gudhi-data/main/spiral_2d.csv", "spiral_2d.csv", checksum_flag = True, file_checksum = '37530355d980d957c4ec06b18c775f90a91e446107d06c6201c9b4000b077f38') - assert 'remote_datasets/sphere3D_pts_on_grid.off' == remote.fetch("https://raw.githubusercontent.com/Hind-M/gudhi-data/main/sphere3D_pts_on_grid.off", "sphere3D_pts_on_grid.off", + names_dw = re.split(r' |/|\\', path_file_dw) + assert 'remote_datasets' == names_dw[0] + assert 'spiral_2d.csv' == names_dw[1] + + path_file_dw = remote.fetch("https://raw.githubusercontent.com/Hind-M/gudhi-data/main/sphere3D_pts_on_grid.off", "sphere3D_pts_on_grid.off", checksum_flag = True, file_checksum = '32f96d2cafb1177f0dd5e0a019b6ff5658e14a619a7815ae55ad0fc5e8bd3f88') + names_dw = re.split(r' |/|\\', path_file_dw) + assert 'remote_datasets' == names_dw[0] + assert 'sphere3D_pts_on_grid.off' == names_dw[1] # Test spiral_2d.csv wrapping function - assert 'remote_datasets/spiral_2d.csv' == remote.fetch_spiral_2d() + path_file_dw = remote.fetch_spiral_2d() + names_dw = re.split(r' |/|\\', path_file_dw) + assert 'remote_datasets' == names_dw[0] + assert 'spiral_2d.csv' == names_dw[1] -- cgit v1.2.3 From 3ee453718eebc7274b19caef4b79d8ec2754d583 Mon Sep 17 00:00:00 2001 From: Hind-M Date: Thu, 3 Jun 2021 16:40:59 +0200 Subject: Modify urls to point to GUDHI/gudhi-data repo --- src/python/gudhi/datasets/remote.py | 3 ++- src/python/test/test_remote_datasets.py | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/python/gudhi/datasets/remote.py b/src/python/gudhi/datasets/remote.py index 4a300b15..525a7b66 100644 --- a/src/python/gudhi/datasets/remote.py +++ b/src/python/gudhi/datasets/remote.py @@ -100,4 +100,5 @@ def fetch_spiral_2d(filename = "spiral_2d.csv", dirname = "remote_datasets"): file_path: string Full path of the created file. """ - return fetch("https://raw.githubusercontent.com/Hind-M/gudhi-data/main/spiral_2d.csv", filename, dirname, True, '37530355d980d957c4ec06b18c775f90a91e446107d06c6201c9b4000b077f38') + return fetch("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d.csv", filename, dirname, True, + '37530355d980d957c4ec06b18c775f90a91e446107d06c6201c9b4000b077f38') diff --git a/src/python/test/test_remote_datasets.py b/src/python/test/test_remote_datasets.py index a822ebaa..63ad7885 100644 --- a/src/python/test/test_remote_datasets.py +++ b/src/python/test/test_remote_datasets.py @@ -13,25 +13,25 @@ import re def test_fetch_remote_datasets(): # Test files download from given urls - path_file_dw = remote.fetch("https://raw.githubusercontent.com/Hind-M/gudhi-data/main/spiral_2d.csv", "spiral_2d.csv") + path_file_dw = remote.fetch("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d.csv", "spiral_2d.csv") names_dw = re.split(r' |/|\\', path_file_dw) assert 'remote_datasets' == names_dw[0] assert 'spiral_2d.csv' == names_dw[1] - path_file_dw = remote.fetch("https://raw.githubusercontent.com/Hind-M/gudhi-data/main/sphere3D_pts_on_grid.off", "sphere3D_pts_on_grid.off") + path_file_dw = remote.fetch("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/sphere3D_pts_on_grid.off", "sphere3D_pts_on_grid.off") names_dw = re.split(r' |/|\\', path_file_dw) assert 'remote_datasets' == names_dw[0] assert 'sphere3D_pts_on_grid.off' == names_dw[1] # Test files download with checksums provided - path_file_dw = remote.fetch("https://raw.githubusercontent.com/Hind-M/gudhi-data/main/spiral_2d.csv", "spiral_2d.csv", checksum_flag = True, + path_file_dw = remote.fetch("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d.csv", "spiral_2d.csv", checksum_flag = True, file_checksum = '37530355d980d957c4ec06b18c775f90a91e446107d06c6201c9b4000b077f38') names_dw = re.split(r' |/|\\', path_file_dw) assert 'remote_datasets' == names_dw[0] assert 'spiral_2d.csv' == names_dw[1] - path_file_dw = remote.fetch("https://raw.githubusercontent.com/Hind-M/gudhi-data/main/sphere3D_pts_on_grid.off", "sphere3D_pts_on_grid.off", + path_file_dw = remote.fetch("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/sphere3D_pts_on_grid.off", "sphere3D_pts_on_grid.off", checksum_flag = True, file_checksum = '32f96d2cafb1177f0dd5e0a019b6ff5658e14a619a7815ae55ad0fc5e8bd3f88') names_dw = re.split(r' |/|\\', path_file_dw) assert 'remote_datasets' == names_dw[0] -- cgit v1.2.3 From bbe2e25a204be50eb422db71b4cf314b92797d4e Mon Sep 17 00:00:00 2001 From: Hind-M Date: Fri, 4 Jun 2021 12:21:11 +0200 Subject: Remove checksum_flag parameter and use value of 'file_checksum is not None' instead --- src/python/gudhi/datasets/remote.py | 13 +++---------- src/python/test/test_remote_datasets.py | 7 ++++--- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/src/python/gudhi/datasets/remote.py b/src/python/gudhi/datasets/remote.py index 525a7b66..fdd20f74 100644 --- a/src/python/gudhi/datasets/remote.py +++ b/src/python/gudhi/datasets/remote.py @@ -39,7 +39,7 @@ def _checksum_sha256(file_path): sha256_hash.update(buffer) return sha256_hash.hexdigest() -def fetch(url, filename, dirname = "remote_datasets", checksum_flag = False, file_checksum = None): +def fetch(url, filename, dirname = "remote_datasets", file_checksum = None): """ Fetch the wanted dataset from the given url and save it in file_path @@ -51,12 +51,8 @@ def fetch(url, filename, dirname = "remote_datasets", checksum_flag = False, fil The name to give to downloaded file. dirname : string The directory to save the file to. Default is "remote_datasets". - checksum_flag : boolean - To set if the user wants the file checksum. Default is 'False'. - Note that if checksum_flag is set to 'True', the file_checksum must be provided. file_checksum : string The file checksum using sha256 to check against the one computed on the downloaded file. - To be considered, checksum_flag must be set to 'True'. Default is 'None'. Returns @@ -71,10 +67,7 @@ def fetch(url, filename, dirname = "remote_datasets", checksum_flag = False, fil urlretrieve(url, file_path) - if (checksum_flag): - if file_checksum is None: - raise ValueError("The file checksum must be provided - different from None - for the check to be performed.") - + if file_checksum is not None: checksum = _checksum_sha256(file_path) if file_checksum != checksum: raise IOError("{} has a SHA256 checksum : {}, " @@ -100,5 +93,5 @@ def fetch_spiral_2d(filename = "spiral_2d.csv", dirname = "remote_datasets"): file_path: string Full path of the created file. """ - return fetch("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d.csv", filename, dirname, True, + return fetch("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d.csv", filename, dirname, '37530355d980d957c4ec06b18c775f90a91e446107d06c6201c9b4000b077f38') diff --git a/src/python/test/test_remote_datasets.py b/src/python/test/test_remote_datasets.py index 63ad7885..6c9217c8 100644 --- a/src/python/test/test_remote_datasets.py +++ b/src/python/test/test_remote_datasets.py @@ -25,14 +25,15 @@ def test_fetch_remote_datasets(): # Test files download with checksums provided - path_file_dw = remote.fetch("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d.csv", "spiral_2d.csv", checksum_flag = True, - file_checksum = '37530355d980d957c4ec06b18c775f90a91e446107d06c6201c9b4000b077f38') + path_file_dw = remote.fetch("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d.csv", "spiral_2d.csv", + file_checksum = '37530355d980d957c4ec06b18c775f90a91e446107d06c6201c9b4000b077f38') names_dw = re.split(r' |/|\\', path_file_dw) assert 'remote_datasets' == names_dw[0] assert 'spiral_2d.csv' == names_dw[1] path_file_dw = remote.fetch("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/sphere3D_pts_on_grid.off", "sphere3D_pts_on_grid.off", - checksum_flag = True, file_checksum = '32f96d2cafb1177f0dd5e0a019b6ff5658e14a619a7815ae55ad0fc5e8bd3f88') + file_checksum = '32f96d2cafb1177f0dd5e0a019b6ff5658e14a619a7815ae55ad0fc5e8bd3f88') + names_dw = re.split(r' |/|\\', path_file_dw) assert 'remote_datasets' == names_dw[0] assert 'sphere3D_pts_on_grid.off' == names_dw[1] -- cgit v1.2.3 From f7b4d9f3ed0b0c386204077ea53a22e2dba527fc Mon Sep 17 00:00:00 2001 From: Hind-M Date: Fri, 4 Jun 2021 15:06:57 +0200 Subject: Check if the wanted file already exists locally before downloading --- src/python/gudhi/datasets/remote.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/python/gudhi/datasets/remote.py b/src/python/gudhi/datasets/remote.py index fdd20f74..b266467d 100644 --- a/src/python/gudhi/datasets/remote.py +++ b/src/python/gudhi/datasets/remote.py @@ -60,20 +60,24 @@ def fetch(url, filename, dirname = "remote_datasets", file_checksum = None): file_path: string Full path of the created file. """ - if not exists(dirname): - makedirs(dirname) file_path = join(dirname, filename) - urlretrieve(url, file_path) - - if file_checksum is not None: - checksum = _checksum_sha256(file_path) - if file_checksum != checksum: - raise IOError("{} has a SHA256 checksum : {}, " - "different from expected : {}." - "The file may be corrupted or the given url may be wrong !".format(file_path, checksum, - file_checksum)) + # Check that an existing file does not already exist at file_path + if not exists(file_path): + # Create directory if not existing + if not exists(dirname): + makedirs(dirname) + + urlretrieve(url, file_path) + + if file_checksum is not None: + checksum = _checksum_sha256(file_path) + if file_checksum != checksum: + raise IOError("{} has a SHA256 checksum : {}, " + "different from expected : {}." + "The file may be corrupted or the given url may be wrong !".format(file_path, checksum, + file_checksum)) return file_path -- cgit v1.2.3 From 16867ca9321e50531307253e957b91c4df7e564c Mon Sep 17 00:00:00 2001 From: Hind-M Date: Fri, 4 Jun 2021 16:39:16 +0200 Subject: Verify checksum even for already existing files locally --- src/python/gudhi/datasets/remote.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/python/gudhi/datasets/remote.py b/src/python/gudhi/datasets/remote.py index b266467d..aef4b277 100644 --- a/src/python/gudhi/datasets/remote.py +++ b/src/python/gudhi/datasets/remote.py @@ -63,7 +63,7 @@ def fetch(url, filename, dirname = "remote_datasets", file_checksum = None): file_path = join(dirname, filename) - # Check that an existing file does not already exist at file_path + # Check for an already existing file at file_path if not exists(file_path): # Create directory if not existing if not exists(dirname): @@ -71,13 +71,12 @@ def fetch(url, filename, dirname = "remote_datasets", file_checksum = None): urlretrieve(url, file_path) - if file_checksum is not None: - checksum = _checksum_sha256(file_path) - if file_checksum != checksum: - raise IOError("{} has a SHA256 checksum : {}, " - "different from expected : {}." - "The file may be corrupted or the given url may be wrong !".format(file_path, checksum, - file_checksum)) + if file_checksum is not None: + checksum = _checksum_sha256(file_path) + if file_checksum != checksum: + raise IOError("{} has a SHA256 checksum : {}, " + "different from expected : {}." + "The file may be corrupted or the given url may be wrong !".format(file_path, checksum, file_checksum)) return file_path -- cgit v1.2.3 From 82524c5b0a6ab02b020574b2200a8721f3ed424c Mon Sep 17 00:00:00 2001 From: Hind-M Date: Mon, 7 Jun 2021 15:03:14 +0200 Subject: Add test with wrong checksum Add functions to avoid redundant code --- src/python/test/test_remote_datasets.py | 43 +++++++++++++++++---------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/src/python/test/test_remote_datasets.py b/src/python/test/test_remote_datasets.py index 6c9217c8..e252980d 100644 --- a/src/python/test/test_remote_datasets.py +++ b/src/python/test/test_remote_datasets.py @@ -10,36 +10,37 @@ from gudhi.datasets import remote import re +import os.path +import pytest -def test_fetch_remote_datasets(): - # Test files download from given urls - path_file_dw = remote.fetch("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d.csv", "spiral_2d.csv") - names_dw = re.split(r' |/|\\', path_file_dw) - assert 'remote_datasets' == names_dw[0] - assert 'spiral_2d.csv' == names_dw[1] +def check_dir_file_names(path_file_dw, filename, dirname): + assert os.path.isfile(path_file_dw) - path_file_dw = remote.fetch("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/sphere3D_pts_on_grid.off", "sphere3D_pts_on_grid.off") names_dw = re.split(r' |/|\\', path_file_dw) - assert 'remote_datasets' == names_dw[0] - assert 'sphere3D_pts_on_grid.off' == names_dw[1] + assert dirname == names_dw[0] + assert filename == names_dw[1] +def check_fetch_output(url, filename, dirname = "remote_datasets", file_checksum = None): + path_file_dw = remote.fetch(url, filename, dirname, file_checksum) + check_dir_file_names(path_file_dw, filename, dirname) + +def test_fetch_remote_datasets(): + # Test fetch with a wrong checksum + with pytest.raises(OSError): + check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d.csv", "spiral_2d.csv", file_checksum = 'XXXXXXXXXX') - # Test files download with checksums provided - path_file_dw = remote.fetch("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d.csv", "spiral_2d.csv", + # Test files download from given urls with checksums provided + check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d.csv", "spiral_2d.csv", file_checksum = '37530355d980d957c4ec06b18c775f90a91e446107d06c6201c9b4000b077f38') - names_dw = re.split(r' |/|\\', path_file_dw) - assert 'remote_datasets' == names_dw[0] - assert 'spiral_2d.csv' == names_dw[1] - path_file_dw = remote.fetch("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/sphere3D_pts_on_grid.off", "sphere3D_pts_on_grid.off", + check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/sphere3D_pts_on_grid.off", "sphere3D_pts_on_grid.off", file_checksum = '32f96d2cafb1177f0dd5e0a019b6ff5658e14a619a7815ae55ad0fc5e8bd3f88') - names_dw = re.split(r' |/|\\', path_file_dw) - assert 'remote_datasets' == names_dw[0] - assert 'sphere3D_pts_on_grid.off' == names_dw[1] + # Test files download from given urls without checksums + check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d.csv", "spiral_2d.csv") + + check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/sphere3D_pts_on_grid.off", "sphere3D_pts_on_grid.off") # Test spiral_2d.csv wrapping function path_file_dw = remote.fetch_spiral_2d() - names_dw = re.split(r' |/|\\', path_file_dw) - assert 'remote_datasets' == names_dw[0] - assert 'spiral_2d.csv' == names_dw[1] + check_dir_file_names(path_file_dw, 'spiral_2d.csv', 'remote_datasets') -- cgit v1.2.3 From 8749199e00c0ed1c32b8e0198a65797de3ad192a Mon Sep 17 00:00:00 2001 From: Hind-M Date: Mon, 27 Sep 2021 15:19:25 +0200 Subject: Add option in cmake to enable or not the inclusion of datasets fetching test (disabled by default) --- src/cmake/modules/GUDHI_modules.cmake | 11 ++++++----- src/python/CMakeLists.txt | 4 +++- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/cmake/modules/GUDHI_modules.cmake b/src/cmake/modules/GUDHI_modules.cmake index ccaf1ac5..9cc1a8f5 100644 --- a/src/cmake/modules/GUDHI_modules.cmake +++ b/src/cmake/modules/GUDHI_modules.cmake @@ -17,11 +17,12 @@ function(add_gudhi_module file_path) endfunction(add_gudhi_module) -option(WITH_GUDHI_BENCHMARK "Activate/desactivate benchmark compilation" OFF) -option(WITH_GUDHI_EXAMPLE "Activate/desactivate examples compilation and installation" OFF) -option(WITH_GUDHI_PYTHON "Activate/desactivate python module compilation and installation" ON) -option(WITH_GUDHI_TEST "Activate/desactivate examples compilation and installation" ON) -option(WITH_GUDHI_UTILITIES "Activate/desactivate utilities compilation and installation" ON) +option(WITH_GUDHI_BENCHMARK "Activate/deactivate benchmark compilation" OFF) +option(WITH_GUDHI_EXAMPLE "Activate/deactivate examples compilation and installation" OFF) +option(WITH_NETWORK "Activate/deactivate datasets fetching test which uses the Internet" OFF) +option(WITH_GUDHI_PYTHON "Activate/deactivate python module compilation and installation" ON) +option(WITH_GUDHI_TEST "Activate/deactivate examples compilation and installation" ON) +option(WITH_GUDHI_UTILITIES "Activate/deactivate utilities compilation and installation" ON) if (WITH_GUDHI_BENCHMARK) set(GUDHI_SUB_DIRECTORIES "${GUDHI_SUB_DIRECTORIES};benchmark") diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index 6f117588..6c8dfe32 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -543,7 +543,9 @@ if(PYTHONINTERP_FOUND) endif() # Fetch remote datasets - add_gudhi_py_test(test_remote_datasets) + if(WITH_NETWORK) + add_gudhi_py_test(test_remote_datasets) + endif() # Set missing or not modules set(GUDHI_MODULES ${GUDHI_MODULES} "python" CACHE INTERNAL "GUDHI_MODULES") -- cgit v1.2.3 From 613db2444a9a12a64b097b944d0180e4fdbff71f Mon Sep 17 00:00:00 2001 From: Hind-M Date: Mon, 27 Sep 2021 17:32:55 +0200 Subject: Document option WITH_NETWORK in installation manual and tests_strategy Enable WITH_NETWORK option in some of the CI platforms (for a minimal testing) --- .appveyor.yml | 5 ++++- .circleci/config.yml | 18 ++++++++++++++++++ .github/for_maintainers/tests_strategy.md | 4 +++- src/common/doc/installation.h | 2 ++ 4 files changed, 27 insertions(+), 2 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index 9ff8f157..b44e08e1 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -29,6 +29,9 @@ environment: - target: Python CMAKE_FLAGS: -DWITH_GUDHI_EXAMPLE=OFF -DWITH_GUDHI_TEST=OFF -DWITH_GUDHI_UTILITIES=OFF -DWITH_GUDHI_PYTHON=ON + - target: PythonTestsWithNetwork + CMAKE_FLAGS: -DWITH_GUDHI_EXAMPLE=OFF -DWITH_GUDHI_TEST=ON -DWITH_NETWORK=ON -DWITH_GUDHI_UTILITIES=OFF -DWITH_GUDHI_PYTHON=ON + cache: - c:\Tools\vcpkg\installed @@ -56,7 +59,7 @@ build_script: - mkdir build - cd build - cmake -G "Visual Studio 15 2017 Win64" %CMAKE_FLAGS% %CMAKE_GMP_FLAGS% %CMAKE_MPFR_FLAGS% %CMAKE_VCPKG_FLAGS% .. - - if [%target%]==[Python] ( + - if [[%target%]==[Python] || [%target%]==[PythonTestsWithNetwork]] ( cd src/python & type setup.py & MSBuild Cython.sln /m /p:Configuration=Release /p:Platform=x64 & diff --git a/.circleci/config.yml b/.circleci/config.yml index f6a875dd..85e42f8a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -77,6 +77,23 @@ jobs: path: /tmp/htmlcov destination: htmlcov + python_tests_with_network: + docker: + - image: gudhi/ci_for_gudhi:latest + steps: + - checkout + - run: + name: Build and test python module with network + command: | + git submodule init + git submodule update + mkdir build + cd build + cmake -DCMAKE_BUILD_TYPE=Release -DWITH_GUDHI_EXAMPLE=OFF -DWITH_GUDHI_UTILITIES=OFF -DWITH_GUDHI_PYTHON=ON -DPython_ADDITIONAL_VERSIONS=3 -DWITH_GUDHI_TEST=ON -DWITH_NETWORK=ON .. + cd src/python + python3 setup.py build_ext --inplace + ctest --output-on-failure + doxygen: docker: - image: gudhi/ci_for_gudhi:latest @@ -245,4 +262,5 @@ workflows: - tests - utils - python + - python_tests_with_network - doxygen diff --git a/.github/for_maintainers/tests_strategy.md b/.github/for_maintainers/tests_strategy.md index 9c181740..8fd7ac0d 100644 --- a/.github/for_maintainers/tests_strategy.md +++ b/.github/for_maintainers/tests_strategy.md @@ -8,13 +8,14 @@ The aim is to help maintainers to anticipate third parties modifications, update ### Linux -As all the third parties are already installed (thanks to docker), the compilations has been seperated by categories to be parallelized: +As all the third parties are already installed (thanks to docker), the compilations has been separated by categories to be parallelized: * examples (C++) * tests (C++) * utils (C++) * doxygen (C++ documentation that is available in the artefacts) * python (including documentation and code coverage that are available in the artefacts) +* python_tests_with_network (includes previous python with WITH_NETWORK option enabled which adds datasets fetching test) (cf. `.circleci/config.yml`) @@ -45,6 +46,7 @@ The compilations has been seperated by categories to be parallelized, but I don' * tests (C++) * utils (C++) * python +* python tests with network Doxygen (C++) is not tested. (cf. `.appveyor.yml`) diff --git a/src/common/doc/installation.h b/src/common/doc/installation.h index 610aa17e..72d4b1e5 100644 --- a/src/common/doc/installation.h +++ b/src/common/doc/installation.h @@ -40,6 +40,8 @@ make \endverbatim * `make test` is using Ctest (CMake test driver * program). If some of the tests are failing, please send us the result of the following command: * \verbatim ctest --output-on-failure \endverbatim + * Testing fetching datasets feature requires the use of the internet and is disabled by default. If you want to include this test, set WITH_NETWORK to ON when building in the previous step (note that this test is included in the python module): + * \verbatim cmake -DCMAKE_BUILD_TYPE=Release -DWITH_GUDHI_TEST=ON -DWITH_NETWORK=ON --DWITH_GUDHI_PYTHON=ON .. \endverbatim * * \subsection documentationgeneration Documentation * To generate the documentation, Doxygen is required. -- cgit v1.2.3 From 3447def563dd7817f7fc1009133685a0ff6ddb43 Mon Sep 17 00:00:00 2001 From: Hind-M Date: Tue, 28 Sep 2021 14:25:10 +0200 Subject: Use 'or' in if instead of '||' in appveyor.yml --- .appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.appveyor.yml b/.appveyor.yml index b44e08e1..ca2ca15c 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -59,7 +59,7 @@ build_script: - mkdir build - cd build - cmake -G "Visual Studio 15 2017 Win64" %CMAKE_FLAGS% %CMAKE_GMP_FLAGS% %CMAKE_MPFR_FLAGS% %CMAKE_VCPKG_FLAGS% .. - - if [[%target%]==[Python] || [%target%]==[PythonTestsWithNetwork]] ( + - if or ([%target%]==[Python]) ([%target%]==[PythonTestsWithNetwork]) ( cd src/python & type setup.py & MSBuild Cython.sln /m /p:Configuration=Release /p:Platform=x64 & -- cgit v1.2.3 From c2bdc8a749449d41ec367089aecd975fe633c121 Mon Sep 17 00:00:00 2001 From: Hind-M Date: Tue, 28 Sep 2021 14:42:35 +0200 Subject: Replace '()' with '{}' in if statement in appveyor.yml --- .appveyor.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index ca2ca15c..521ec42d 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -59,12 +59,12 @@ build_script: - mkdir build - cd build - cmake -G "Visual Studio 15 2017 Win64" %CMAKE_FLAGS% %CMAKE_GMP_FLAGS% %CMAKE_MPFR_FLAGS% %CMAKE_VCPKG_FLAGS% .. - - if or ([%target%]==[Python]) ([%target%]==[PythonTestsWithNetwork]) ( + - if or ([%target%]==[Python]) ([%target%]==[PythonTestsWithNetwork]) { cd src/python & type setup.py & MSBuild Cython.sln /m /p:Configuration=Release /p:Platform=x64 & ctest -j 1 --output-on-failure -C Release - ) else ( + } else { MSBuild GUDHIdev.sln /m /p:Configuration=Release /p:Platform=x64 & ctest -j 1 --output-on-failure -C Release -E diff_files - ) + } -- cgit v1.2.3 From c9d6439fb9a6e65d7aa9f18bce675de65e901a0d Mon Sep 17 00:00:00 2001 From: Hind-M Date: Mon, 25 Oct 2021 11:43:09 +0200 Subject: Rename WITH_NETWORK option to WITH_GUDHI_REMOTE_TEST --- .appveyor.yml | 2 +- .circleci/config.yml | 2 +- .github/for_maintainers/tests_strategy.md | 2 +- src/cmake/modules/GUDHI_modules.cmake | 2 +- src/common/doc/installation.h | 4 ++-- src/python/CMakeLists.txt | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index 521ec42d..ee6067e0 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -30,7 +30,7 @@ environment: CMAKE_FLAGS: -DWITH_GUDHI_EXAMPLE=OFF -DWITH_GUDHI_TEST=OFF -DWITH_GUDHI_UTILITIES=OFF -DWITH_GUDHI_PYTHON=ON - target: PythonTestsWithNetwork - CMAKE_FLAGS: -DWITH_GUDHI_EXAMPLE=OFF -DWITH_GUDHI_TEST=ON -DWITH_NETWORK=ON -DWITH_GUDHI_UTILITIES=OFF -DWITH_GUDHI_PYTHON=ON + CMAKE_FLAGS: -DWITH_GUDHI_EXAMPLE=OFF -DWITH_GUDHI_TEST=ON -DWITH_GUDHI_REMOTE_TEST=ON -DWITH_GUDHI_UTILITIES=OFF -DWITH_GUDHI_PYTHON=ON cache: diff --git a/.circleci/config.yml b/.circleci/config.yml index 85e42f8a..262e124b 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -89,7 +89,7 @@ jobs: git submodule update mkdir build cd build - cmake -DCMAKE_BUILD_TYPE=Release -DWITH_GUDHI_EXAMPLE=OFF -DWITH_GUDHI_UTILITIES=OFF -DWITH_GUDHI_PYTHON=ON -DPython_ADDITIONAL_VERSIONS=3 -DWITH_GUDHI_TEST=ON -DWITH_NETWORK=ON .. + cmake -DCMAKE_BUILD_TYPE=Release -DWITH_GUDHI_EXAMPLE=OFF -DWITH_GUDHI_UTILITIES=OFF -DWITH_GUDHI_PYTHON=ON -DPython_ADDITIONAL_VERSIONS=3 -DWITH_GUDHI_TEST=ON -DWITH_GUDHI_REMOTE_TEST=ON .. cd src/python python3 setup.py build_ext --inplace ctest --output-on-failure diff --git a/.github/for_maintainers/tests_strategy.md b/.github/for_maintainers/tests_strategy.md index 8fd7ac0d..610e1749 100644 --- a/.github/for_maintainers/tests_strategy.md +++ b/.github/for_maintainers/tests_strategy.md @@ -15,7 +15,7 @@ As all the third parties are already installed (thanks to docker), the compilati * utils (C++) * doxygen (C++ documentation that is available in the artefacts) * python (including documentation and code coverage that are available in the artefacts) -* python_tests_with_network (includes previous python with WITH_NETWORK option enabled which adds datasets fetching test) +* python_tests_with_network (includes previous python with WITH_GUDHI_REMOTE_TEST option enabled which adds datasets fetching test) (cf. `.circleci/config.yml`) diff --git a/src/cmake/modules/GUDHI_modules.cmake b/src/cmake/modules/GUDHI_modules.cmake index 9cc1a8f5..7cdce307 100644 --- a/src/cmake/modules/GUDHI_modules.cmake +++ b/src/cmake/modules/GUDHI_modules.cmake @@ -19,7 +19,7 @@ endfunction(add_gudhi_module) option(WITH_GUDHI_BENCHMARK "Activate/deactivate benchmark compilation" OFF) option(WITH_GUDHI_EXAMPLE "Activate/deactivate examples compilation and installation" OFF) -option(WITH_NETWORK "Activate/deactivate datasets fetching test which uses the Internet" OFF) +option(WITH_GUDHI_REMOTE_TEST "Activate/deactivate datasets fetching test which uses the Internet" OFF) option(WITH_GUDHI_PYTHON "Activate/deactivate python module compilation and installation" ON) option(WITH_GUDHI_TEST "Activate/deactivate examples compilation and installation" ON) option(WITH_GUDHI_UTILITIES "Activate/deactivate utilities compilation and installation" ON) diff --git a/src/common/doc/installation.h b/src/common/doc/installation.h index 72d4b1e5..b0fbdf20 100644 --- a/src/common/doc/installation.h +++ b/src/common/doc/installation.h @@ -40,8 +40,8 @@ make \endverbatim * `make test` is using Ctest (CMake test driver * program). If some of the tests are failing, please send us the result of the following command: * \verbatim ctest --output-on-failure \endverbatim - * Testing fetching datasets feature requires the use of the internet and is disabled by default. If you want to include this test, set WITH_NETWORK to ON when building in the previous step (note that this test is included in the python module): - * \verbatim cmake -DCMAKE_BUILD_TYPE=Release -DWITH_GUDHI_TEST=ON -DWITH_NETWORK=ON --DWITH_GUDHI_PYTHON=ON .. \endverbatim + * Testing fetching datasets feature requires the use of the internet and is disabled by default. If you want to include this test, set WITH_GUDHI_REMOTE_TEST to ON when building in the previous step (note that this test is included in the python module): + * \verbatim cmake -DCMAKE_BUILD_TYPE=Release -DWITH_GUDHI_TEST=ON -DWITH_GUDHI_REMOTE_TEST=ON --DWITH_GUDHI_PYTHON=ON .. \endverbatim * * \subsection documentationgeneration Documentation * To generate the documentation, Doxygen is required. diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index 6c8dfe32..ddb5c9c2 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -543,7 +543,7 @@ if(PYTHONINTERP_FOUND) endif() # Fetch remote datasets - if(WITH_NETWORK) + if(WITH_GUDHI_REMOTE_TEST) add_gudhi_py_test(test_remote_datasets) endif() -- cgit v1.2.3 From 5db7ab2b55262a88c0ceecbb9c7ea004d9ed087e Mon Sep 17 00:00:00 2001 From: Hind-M Date: Mon, 25 Oct 2021 15:34:03 +0200 Subject: Enable WITH_GUDHI_REMOTE_TEST option for python target in CI platforms --- .appveyor.yml | 11 ++++------- .circleci/config.yml | 20 +------------------- .github/for_maintainers/tests_strategy.md | 12 +++++------- azure-pipelines.yml | 2 +- src/cmake/modules/GUDHI_modules.cmake | 6 +++--- 5 files changed, 14 insertions(+), 37 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index ee6067e0..e90f1b83 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -27,10 +27,7 @@ environment: CMAKE_FLAGS: -DWITH_GUDHI_EXAMPLE=OFF -DWITH_GUDHI_TEST=OFF -DWITH_GUDHI_UTILITIES=ON -DWITH_GUDHI_PYTHON=OFF - target: Python - CMAKE_FLAGS: -DWITH_GUDHI_EXAMPLE=OFF -DWITH_GUDHI_TEST=OFF -DWITH_GUDHI_UTILITIES=OFF -DWITH_GUDHI_PYTHON=ON - - - target: PythonTestsWithNetwork - CMAKE_FLAGS: -DWITH_GUDHI_EXAMPLE=OFF -DWITH_GUDHI_TEST=ON -DWITH_GUDHI_REMOTE_TEST=ON -DWITH_GUDHI_UTILITIES=OFF -DWITH_GUDHI_PYTHON=ON + CMAKE_FLAGS: -DWITH_GUDHI_EXAMPLE=OFF -DWITH_GUDHI_TEST=OFF -DWITH_GUDHI_UTILITIES=OFF -DWITH_GUDHI_PYTHON=ON -DWITH_GUDHI_REMOTE_TEST=ON cache: @@ -59,12 +56,12 @@ build_script: - mkdir build - cd build - cmake -G "Visual Studio 15 2017 Win64" %CMAKE_FLAGS% %CMAKE_GMP_FLAGS% %CMAKE_MPFR_FLAGS% %CMAKE_VCPKG_FLAGS% .. - - if or ([%target%]==[Python]) ([%target%]==[PythonTestsWithNetwork]) { + - if [%target%]==[Python] ( cd src/python & type setup.py & MSBuild Cython.sln /m /p:Configuration=Release /p:Platform=x64 & ctest -j 1 --output-on-failure -C Release - } else { + ) else ( MSBuild GUDHIdev.sln /m /p:Configuration=Release /p:Platform=x64 & ctest -j 1 --output-on-failure -C Release -E diff_files - } + ) diff --git a/.circleci/config.yml b/.circleci/config.yml index 262e124b..90737006 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -61,7 +61,7 @@ jobs: cmake -DUSER_VERSION_DIR=version .. make user_version cd version - cmake -DCMAKE_BUILD_TYPE=Release -DWITH_GUDHI_EXAMPLE=OFF -DWITH_GUDHI_UTILITIES=OFF -DWITH_GUDHI_PYTHON=ON -DPython_ADDITIONAL_VERSIONS=3 . + cmake -DCMAKE_BUILD_TYPE=Release -DWITH_GUDHI_EXAMPLE=OFF -DWITH_GUDHI_UTILITIES=OFF -DWITH_GUDHI_PYTHON=ON -DPython_ADDITIONAL_VERSIONS=3 -DWITH_GUDHI_REMOTE_TEST=ON . cd python python3 setup.py build_ext --inplace make sphinx @@ -77,23 +77,6 @@ jobs: path: /tmp/htmlcov destination: htmlcov - python_tests_with_network: - docker: - - image: gudhi/ci_for_gudhi:latest - steps: - - checkout - - run: - name: Build and test python module with network - command: | - git submodule init - git submodule update - mkdir build - cd build - cmake -DCMAKE_BUILD_TYPE=Release -DWITH_GUDHI_EXAMPLE=OFF -DWITH_GUDHI_UTILITIES=OFF -DWITH_GUDHI_PYTHON=ON -DPython_ADDITIONAL_VERSIONS=3 -DWITH_GUDHI_TEST=ON -DWITH_GUDHI_REMOTE_TEST=ON .. - cd src/python - python3 setup.py build_ext --inplace - ctest --output-on-failure - doxygen: docker: - image: gudhi/ci_for_gudhi:latest @@ -262,5 +245,4 @@ workflows: - tests - utils - python - - python_tests_with_network - doxygen diff --git a/.github/for_maintainers/tests_strategy.md b/.github/for_maintainers/tests_strategy.md index 610e1749..2bba3f42 100644 --- a/.github/for_maintainers/tests_strategy.md +++ b/.github/for_maintainers/tests_strategy.md @@ -14,8 +14,7 @@ As all the third parties are already installed (thanks to docker), the compilati * tests (C++) * utils (C++) * doxygen (C++ documentation that is available in the artefacts) -* python (including documentation and code coverage that are available in the artefacts) -* python_tests_with_network (includes previous python with WITH_GUDHI_REMOTE_TEST option enabled which adds datasets fetching test) +* python (including documentation and code coverage that are available in the artefacts; here the WITH_GUDHI_REMOTE_TEST option is enabled which adds datasets fetching test) (cf. `.circleci/config.yml`) @@ -40,13 +39,12 @@ docker push gudhi/ci_for_gudhi_wo_cgal:latest ### Windows -The compilations has been seperated by categories to be parallelized, but I don't know why builds are not run in parallel: +The compilations has been separated by categories to be parallelized, but I don't know why builds are not run in parallel: * examples (C++) * tests (C++) * utils (C++) -* python -* python tests with network +* python (here the WITH_GUDHI_REMOTE_TEST option is enabled which adds datasets fetching test) Doxygen (C++) is not tested. (cf. `.appveyor.yml`) @@ -56,12 +54,12 @@ In case of installation issue, check in [vcpkg issues](https://github.com/micros ### OSx -The compilations has been seperated by categories to be parallelized: +The compilations has been separated by categories to be parallelized: * examples (C++) * tests (C++) * utils (C++) -* python +* python (here the WITH_GUDHI_REMOTE_TEST option is enabled which adds datasets fetching test) * Doxygen (C++) (cf. `azure-pipelines.yml`) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 6c194f2a..6e102b83 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -30,7 +30,7 @@ jobs: source activate gudhi_build_env mkdir build cd build - cmake -DCMAKE_BUILD_TYPE:STRING=$(cmakeBuildType) -DWITH_GUDHI_TEST=ON -DWITH_GUDHI_UTILITIES=ON -DWITH_GUDHI_PYTHON=ON -DPython_ADDITIONAL_VERSIONS=3 .. + cmake -DCMAKE_BUILD_TYPE:STRING=$(cmakeBuildType) -DWITH_GUDHI_TEST=ON -DWITH_GUDHI_UTILITIES=ON -DWITH_GUDHI_PYTHON=ON -DPython_ADDITIONAL_VERSIONS=3 -DWITH_GUDHI_REMOTE_TEST=ON .. make -j 4 make doxygen ctest -j 4 --output-on-failure # -E sphinx remove sphinx build as it fails diff --git a/src/cmake/modules/GUDHI_modules.cmake b/src/cmake/modules/GUDHI_modules.cmake index 7cdce307..cbed6351 100644 --- a/src/cmake/modules/GUDHI_modules.cmake +++ b/src/cmake/modules/GUDHI_modules.cmake @@ -18,11 +18,11 @@ function(add_gudhi_module file_path) endfunction(add_gudhi_module) option(WITH_GUDHI_BENCHMARK "Activate/deactivate benchmark compilation" OFF) -option(WITH_GUDHI_EXAMPLE "Activate/deactivate examples compilation and installation" OFF) +option(WITH_GUDHI_EXAMPLE "Activate/deactivate examples compilation" OFF) option(WITH_GUDHI_REMOTE_TEST "Activate/deactivate datasets fetching test which uses the Internet" OFF) option(WITH_GUDHI_PYTHON "Activate/deactivate python module compilation and installation" ON) -option(WITH_GUDHI_TEST "Activate/deactivate examples compilation and installation" ON) -option(WITH_GUDHI_UTILITIES "Activate/deactivate utilities compilation and installation" ON) +option(WITH_GUDHI_TEST "Activate/deactivate tests compilation" ON) +option(WITH_GUDHI_UTILITIES "Activate/deactivate utilities compilation" ON) if (WITH_GUDHI_BENCHMARK) set(GUDHI_SUB_DIRECTORIES "${GUDHI_SUB_DIRECTORIES};benchmark") -- cgit v1.2.3 From aa600c433e1f756bec4323e29e86786b937d9443 Mon Sep 17 00:00:00 2001 From: Hind-M Date: Mon, 15 Nov 2021 11:12:27 +0100 Subject: Print files licenses when available Wrap bunny fetching Add corresponding tests --- src/python/gudhi/datasets/remote.py | 38 ++++++++++++++++++++++- src/python/test/test_remote_datasets.py | 55 ++++++++++++++++++++++++++------- 2 files changed, 81 insertions(+), 12 deletions(-) diff --git a/src/python/gudhi/datasets/remote.py b/src/python/gudhi/datasets/remote.py index aef4b277..7e8f9ce7 100644 --- a/src/python/gudhi/datasets/remote.py +++ b/src/python/gudhi/datasets/remote.py @@ -39,7 +39,7 @@ def _checksum_sha256(file_path): sha256_hash.update(buffer) return sha256_hash.hexdigest() -def fetch(url, filename, dirname = "remote_datasets", file_checksum = None): +def fetch(url, filename, dirname = "remote_datasets", file_checksum = None, accept_license = False): """ Fetch the wanted dataset from the given url and save it in file_path @@ -54,6 +54,9 @@ def fetch(url, filename, dirname = "remote_datasets", file_checksum = None): file_checksum : string The file checksum using sha256 to check against the one computed on the downloaded file. Default is 'None'. + accept_license : boolean + Flag to specify if user accepts the file LICENSE and prevents from printing the corresponding license terms. + Default is False Returns ------- @@ -69,6 +72,7 @@ def fetch(url, filename, dirname = "remote_datasets", file_checksum = None): if not exists(dirname): makedirs(dirname) + # Get the file urlretrieve(url, file_path) if file_checksum is not None: @@ -78,6 +82,13 @@ def fetch(url, filename, dirname = "remote_datasets", file_checksum = None): "different from expected : {}." "The file may be corrupted or the given url may be wrong !".format(file_path, checksum, file_checksum)) + # Print license terms unless accept_license is set to True + if not accept_license: + license_file = join(dirname, "LICENSE") + if exists(license_file) and (file_path != license_file): + with open(license_file, 'r') as f: + print(f.read()) + return file_path def fetch_spiral_2d(filename = "spiral_2d.csv", dirname = "remote_datasets"): @@ -98,3 +109,28 @@ def fetch_spiral_2d(filename = "spiral_2d.csv", dirname = "remote_datasets"): """ return fetch("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d.csv", filename, dirname, '37530355d980d957c4ec06b18c775f90a91e446107d06c6201c9b4000b077f38') + +def fetch_bunny(filename = "bunny.off", dirname = "remote_datasets/bunny", accept_license = False): + """ + Fetch bunny.off remotely and its LICENSE file + + Parameters + ---------- + filename : string + The name to give to downloaded file. Default is "bunny.off" + dirname : string + The directory to save the file to. Default is "remote_datasets/bunny". + accept_license : boolean + Flag to specify if user accepts the file LICENSE and prevents from printing the corresponding license terms. + Default is False + + Returns + ------- + files_paths: list of strings + Full paths of the created file and its LICENSE. + """ + + return [fetch("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points//bunny/LICENSE", "LICENSE", dirname, + 'aeb1bad319b7d74fa0b8076358182f9c6b1284c67cc07dc67cbc9bc73025d956'), + fetch("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points//bunny/bunny.off", filename, dirname, + '11852d5e73e2d4bd7b86a2c5cc8a5884d0fbb72539493e8cec100ea922b19f5b', accept_license)] diff --git a/src/python/test/test_remote_datasets.py b/src/python/test/test_remote_datasets.py index e252980d..e777abc6 100644 --- a/src/python/test/test_remote_datasets.py +++ b/src/python/test/test_remote_datasets.py @@ -11,36 +11,69 @@ from gudhi.datasets import remote import re import os.path +import io +import sys import pytest -def check_dir_file_names(path_file_dw, filename, dirname): +def _check_dir_file_names(path_file_dw, filename, dirname): assert os.path.isfile(path_file_dw) names_dw = re.split(r' |/|\\', path_file_dw) - assert dirname == names_dw[0] - assert filename == names_dw[1] + # Case where inner directories are created in "remote_datasets/"; e.g: "remote_datasets/bunny" + if len(names_dw) >= 3: + for i in range(len(names_dw)-1): + assert re.split(r' |/|\\', dirname)[i] == names_dw[i] + assert filename == names_dw[i+1] + else: + assert dirname == names_dw[0] + assert filename == names_dw[1] -def check_fetch_output(url, filename, dirname = "remote_datasets", file_checksum = None): +def _check_fetch_output(url, filename, dirname = "remote_datasets", file_checksum = None): path_file_dw = remote.fetch(url, filename, dirname, file_checksum) - check_dir_file_names(path_file_dw, filename, dirname) + _check_dir_file_names(path_file_dw, filename, dirname) + +def _get_bunny_license_print(accept_license = False): + capturedOutput = io.StringIO() + # Redirect stdout + sys.stdout = capturedOutput + remote.fetch("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points//bunny/bunny.off", "bunny.off", "remote_datasets/bunny", + '11852d5e73e2d4bd7b86a2c5cc8a5884d0fbb72539493e8cec100ea922b19f5b', accept_license) + # Reset redirect + sys.stdout = sys.__stdout__ + return capturedOutput def test_fetch_remote_datasets(): # Test fetch with a wrong checksum with pytest.raises(OSError): - check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d.csv", "spiral_2d.csv", file_checksum = 'XXXXXXXXXX') + _check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d.csv", "spiral_2d.csv", file_checksum = 'XXXXXXXXXX') # Test files download from given urls with checksums provided - check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d.csv", "spiral_2d.csv", + _check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d.csv", "spiral_2d.csv", file_checksum = '37530355d980d957c4ec06b18c775f90a91e446107d06c6201c9b4000b077f38') - check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/sphere3D_pts_on_grid.off", "sphere3D_pts_on_grid.off", + _check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/sphere3D_pts_on_grid.off", "sphere3D_pts_on_grid.off", file_checksum = '32f96d2cafb1177f0dd5e0a019b6ff5658e14a619a7815ae55ad0fc5e8bd3f88') # Test files download from given urls without checksums - check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d.csv", "spiral_2d.csv") + _check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d.csv", "spiral_2d.csv") - check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/sphere3D_pts_on_grid.off", "sphere3D_pts_on_grid.off") + _check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/sphere3D_pts_on_grid.off", "sphere3D_pts_on_grid.off") # Test spiral_2d.csv wrapping function path_file_dw = remote.fetch_spiral_2d() - check_dir_file_names(path_file_dw, 'spiral_2d.csv', 'remote_datasets') + _check_dir_file_names(path_file_dw, 'spiral_2d.csv', 'remote_datasets') + + # Test printing existing LICENSE file when fetching bunny.off with accept_license = False (default) + # Fetch LICENSE file + remote.fetch("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points//bunny/LICENSE", "LICENSE", "remote_datasets/bunny", + 'aeb1bad319b7d74fa0b8076358182f9c6b1284c67cc07dc67cbc9bc73025d956') + with open("remote_datasets/bunny/LICENSE") as f: + assert f.read() == _get_bunny_license_print().getvalue().rstrip("\n") + + # Test not printing bunny.off LICENSE when accept_license = True + assert "" == _get_bunny_license_print(accept_license = True).getvalue() + + # Test fetch_bunny wrapping function + path_file_dw = remote.fetch_bunny() + _check_dir_file_names(path_file_dw[0], 'LICENSE', 'remote_datasets/bunny') + _check_dir_file_names(path_file_dw[1], 'bunny.off', 'remote_datasets/bunny') -- cgit v1.2.3 From d941ebc854880a06707999f677137a9d6ff7473f Mon Sep 17 00:00:00 2001 From: Hind-M Date: Wed, 26 Jan 2022 15:21:20 +0100 Subject: Add datasets remote fetching module to doc --- src/python/doc/datasets.inc | 14 ++++ src/python/doc/datasets.rst | 118 +++++++++++++++++++++++++++++++++ src/python/doc/datasets_generators.inc | 14 ---- src/python/doc/datasets_generators.rst | 105 ----------------------------- src/python/doc/index.rst | 6 +- 5 files changed, 135 insertions(+), 122 deletions(-) create mode 100644 src/python/doc/datasets.inc create mode 100644 src/python/doc/datasets.rst delete mode 100644 src/python/doc/datasets_generators.inc delete mode 100644 src/python/doc/datasets_generators.rst diff --git a/src/python/doc/datasets.inc b/src/python/doc/datasets.inc new file mode 100644 index 00000000..95a87678 --- /dev/null +++ b/src/python/doc/datasets.inc @@ -0,0 +1,14 @@ +.. table:: + :widths: 30 40 30 + + +-----------------------------------+--------------------------------------------+--------------------------------------------------------------------------------------+ + | .. figure:: | Datasets either generated or fetched. | :Authors: Hind Montassif | + | img/sphere_3d.png | | | + | | | :Since: GUDHI 3.5.0 | + | | | | + | | | :License: MIT (`LGPL v3 `_) | + | | | | + | | | :Requires: `CGAL `_ | + +-----------------------------------+--------------------------------------------+--------------------------------------------------------------------------------------+ + | * :doc:`datasets` | + +-----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/python/doc/datasets.rst b/src/python/doc/datasets.rst new file mode 100644 index 00000000..4fa8a628 --- /dev/null +++ b/src/python/doc/datasets.rst @@ -0,0 +1,118 @@ + +:orphan: + +.. To get rid of WARNING: document isn't included in any toctree + +================ +Datasets manual +================ + +Datasets generators +=================== + +We provide the generation of different customizable datasets to use as inputs for Gudhi complexes and data structures. + +Points generators +------------------ + +The module **points** enables the generation of random points on a sphere, random points on a torus and as a grid. + +Points on sphere +^^^^^^^^^^^^^^^^ + +The function **sphere** enables the generation of random i.i.d. points uniformly on a (d-1)-sphere in :math:`R^d`. +The user should provide the number of points to be generated on the sphere :code:`n_samples` and the ambient dimension :code:`ambient_dim`. +The :code:`radius` of sphere is optional and is equal to **1** by default. +Only random points generation is currently available. + +The generated points are given as an array of shape :math:`(n\_samples, ambient\_dim)`. + +Example +""""""" + +.. code-block:: python + + from gudhi.datasets.generators import points + from gudhi import AlphaComplex + + # Generate 50 points on a sphere in R^2 + gen_points = points.sphere(n_samples = 50, ambient_dim = 2, radius = 1, sample = "random") + + # Create an alpha complex from the generated points + alpha_complex = AlphaComplex(points = gen_points) + +.. autofunction:: gudhi.datasets.generators.points.sphere + +Points on a flat torus +^^^^^^^^^^^^^^^^^^^^^^ + +You can also generate points on a torus. + +Two functions are available and give the same output: the first one depends on **CGAL** and the second does not and consists of full python code. + +On another hand, two sample types are provided: you can either generate i.i.d. points on a d-torus in :math:`R^{2d}` *randomly* or on a *grid*. + +First function: **ctorus** +""""""""""""""""""""""""""" + +The user should provide the number of points to be generated on the torus :code:`n_samples`, and the dimension :code:`dim` of the torus on which points would be generated in :math:`R^{2dim}`. +The :code:`sample` argument is optional and is set to **'random'** by default. +In this case, the returned generated points would be an array of shape :math:`(n\_samples, 2*dim)`. +Otherwise, if set to **'grid'**, the points are generated on a grid and would be given as an array of shape: + +.. math:: + + ( ⌊n\_samples^{1 \over {dim}}⌋^{dim}, 2*dim ) + +**Note 1:** The output array first shape is rounded down to the closest perfect :math:`dim^{th}` power. + +**Note 2:** This version is recommended when the user wishes to use **'grid'** as sample type, or **'random'** with a relatively small number of samples (~ less than 150). + +Example +""""""" +.. code-block:: python + + from gudhi.datasets.generators import points + + # Generate 50 points randomly on a torus in R^6 + gen_points = points.ctorus(n_samples = 50, dim = 3) + + # Generate 27 points on a torus as a grid in R^6 + gen_points = points.ctorus(n_samples = 50, dim = 3, sample = 'grid') + +.. autofunction:: gudhi.datasets.generators.points.ctorus + +Second function: **torus** +""""""""""""""""""""""""""" + +The user should provide the number of points to be generated on the torus :code:`n_samples` and the dimension :code:`dim` of the torus on which points would be generated in :math:`R^{2dim}`. +The :code:`sample` argument is optional and is set to **'random'** by default. +The other allowed value of sample type is **'grid'**. + +**Note:** This version is recommended when the user wishes to use **'random'** as sample type with a great number of samples and a low dimension. + +Example +""""""" +.. code-block:: python + + from gudhi.datasets.generators import points + + # Generate 50 points randomly on a torus in R^6 + gen_points = points.torus(n_samples = 50, dim = 3) + + # Generate 27 points on a torus as a grid in R^6 + gen_points = points.torus(n_samples = 50, dim = 3, sample = 'grid') + + +.. autofunction:: gudhi.datasets.generators.points.torus + + +Fetching datasets +================= + +We provide some ready-to-use datasets that are not available by default when getting GUDHI, and need to be fetched explicitly. + +.. automodule:: gudhi.datasets.remote + :members: + :special-members: + :show-inheritance: diff --git a/src/python/doc/datasets_generators.inc b/src/python/doc/datasets_generators.inc deleted file mode 100644 index 8d169275..00000000 --- a/src/python/doc/datasets_generators.inc +++ /dev/null @@ -1,14 +0,0 @@ -.. table:: - :widths: 30 40 30 - - +-----------------------------------+--------------------------------------------+--------------------------------------------------------------------------------------+ - | .. figure:: | Datasets generators (points). | :Authors: Hind Montassif | - | img/sphere_3d.png | | | - | | | :Since: GUDHI 3.5.0 | - | | | | - | | | :License: MIT (`LGPL v3 `_) | - | | | | - | | | :Requires: `CGAL `_ | - +-----------------------------------+--------------------------------------------+--------------------------------------------------------------------------------------+ - | * :doc:`datasets_generators` | - +-----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/python/doc/datasets_generators.rst b/src/python/doc/datasets_generators.rst deleted file mode 100644 index 260c3882..00000000 --- a/src/python/doc/datasets_generators.rst +++ /dev/null @@ -1,105 +0,0 @@ - -:orphan: - -.. To get rid of WARNING: document isn't included in any toctree - -=========================== -Datasets generators manual -=========================== - -We provide the generation of different customizable datasets to use as inputs for Gudhi complexes and data structures. - - -Points generators ------------------- - -The module **points** enables the generation of random points on a sphere, random points on a torus and as a grid. - -Points on sphere -^^^^^^^^^^^^^^^^ - -The function **sphere** enables the generation of random i.i.d. points uniformly on a (d-1)-sphere in :math:`R^d`. -The user should provide the number of points to be generated on the sphere :code:`n_samples` and the ambient dimension :code:`ambient_dim`. -The :code:`radius` of sphere is optional and is equal to **1** by default. -Only random points generation is currently available. - -The generated points are given as an array of shape :math:`(n\_samples, ambient\_dim)`. - -Example -""""""" - -.. code-block:: python - - from gudhi.datasets.generators import points - from gudhi import AlphaComplex - - # Generate 50 points on a sphere in R^2 - gen_points = points.sphere(n_samples = 50, ambient_dim = 2, radius = 1, sample = "random") - - # Create an alpha complex from the generated points - alpha_complex = AlphaComplex(points = gen_points) - -.. autofunction:: gudhi.datasets.generators.points.sphere - -Points on a flat torus -^^^^^^^^^^^^^^^^^^^^^^ - -You can also generate points on a torus. - -Two functions are available and give the same output: the first one depends on **CGAL** and the second does not and consists of full python code. - -On another hand, two sample types are provided: you can either generate i.i.d. points on a d-torus in :math:`R^{2d}` *randomly* or on a *grid*. - -First function: **ctorus** -""""""""""""""""""""""""""" - -The user should provide the number of points to be generated on the torus :code:`n_samples`, and the dimension :code:`dim` of the torus on which points would be generated in :math:`R^{2dim}`. -The :code:`sample` argument is optional and is set to **'random'** by default. -In this case, the returned generated points would be an array of shape :math:`(n\_samples, 2*dim)`. -Otherwise, if set to **'grid'**, the points are generated on a grid and would be given as an array of shape: - -.. math:: - - ( ⌊n\_samples^{1 \over {dim}}⌋^{dim}, 2*dim ) - -**Note 1:** The output array first shape is rounded down to the closest perfect :math:`dim^{th}` power. - -**Note 2:** This version is recommended when the user wishes to use **'grid'** as sample type, or **'random'** with a relatively small number of samples (~ less than 150). - -Example -""""""" -.. code-block:: python - - from gudhi.datasets.generators import points - - # Generate 50 points randomly on a torus in R^6 - gen_points = points.ctorus(n_samples = 50, dim = 3) - - # Generate 27 points on a torus as a grid in R^6 - gen_points = points.ctorus(n_samples = 50, dim = 3, sample = 'grid') - -.. autofunction:: gudhi.datasets.generators.points.ctorus - -Second function: **torus** -""""""""""""""""""""""""""" - -The user should provide the number of points to be generated on the torus :code:`n_samples` and the dimension :code:`dim` of the torus on which points would be generated in :math:`R^{2dim}`. -The :code:`sample` argument is optional and is set to **'random'** by default. -The other allowed value of sample type is **'grid'**. - -**Note:** This version is recommended when the user wishes to use **'random'** as sample type with a great number of samples and a low dimension. - -Example -""""""" -.. code-block:: python - - from gudhi.datasets.generators import points - - # Generate 50 points randomly on a torus in R^6 - gen_points = points.torus(n_samples = 50, dim = 3) - - # Generate 27 points on a torus as a grid in R^6 - gen_points = points.torus(n_samples = 50, dim = 3, sample = 'grid') - - -.. autofunction:: gudhi.datasets.generators.points.torus diff --git a/src/python/doc/index.rst b/src/python/doc/index.rst index 2d7921ae..35f4ba46 100644 --- a/src/python/doc/index.rst +++ b/src/python/doc/index.rst @@ -92,7 +92,7 @@ Clustering .. include:: clustering.inc -Datasets generators -******************* +Datasets +******** -.. include:: datasets_generators.inc +.. include:: datasets.inc -- cgit v1.2.3 From 8d1e7aeb3416194d00f45587d1ecea85ba218028 Mon Sep 17 00:00:00 2001 From: Hind-M Date: Fri, 28 Jan 2022 16:21:33 +0100 Subject: Return arrays of points instead of files paths when fetching bunny.npy and spiral_2d.csv --- src/python/gudhi/datasets/remote.py | 83 +++++++++++++++++++++------------ src/python/test/test_remote_datasets.py | 33 +++++++------ 2 files changed, 72 insertions(+), 44 deletions(-) diff --git a/src/python/gudhi/datasets/remote.py b/src/python/gudhi/datasets/remote.py index 7e8f9ce7..ef797417 100644 --- a/src/python/gudhi/datasets/remote.py +++ b/src/python/gudhi/datasets/remote.py @@ -7,17 +7,17 @@ # Modification(s): # - YYYY/MM Author: Description of the modification -import hashlib - from os.path import join, exists from os import makedirs from urllib.request import urlretrieve +import hashlib +import numpy as np def _checksum_sha256(file_path): """ - Compute the file checksum using sha256 + Compute the file checksum using sha256. Parameters ---------- @@ -26,7 +26,7 @@ def _checksum_sha256(file_path): Returns ------- - The hex digest of file_path + The hex digest of file_path. """ sha256_hash = hashlib.sha256() chunk_size = 4096 @@ -39,9 +39,9 @@ def _checksum_sha256(file_path): sha256_hash.update(buffer) return sha256_hash.hexdigest() -def fetch(url, filename, dirname = "remote_datasets", file_checksum = None, accept_license = False): +def _fetch_remote(url, filename, dirname = "remote_datasets", file_checksum = None, accept_license = False): """ - Fetch the wanted dataset from the given url and save it in file_path + Fetch the wanted dataset from the given url and save it in file_path. Parameters ---------- @@ -56,7 +56,7 @@ def fetch(url, filename, dirname = "remote_datasets", file_checksum = None, acce Default is 'None'. accept_license : boolean Flag to specify if user accepts the file LICENSE and prevents from printing the corresponding license terms. - Default is False + Default is False. Returns ------- @@ -66,14 +66,8 @@ def fetch(url, filename, dirname = "remote_datasets", file_checksum = None, acce file_path = join(dirname, filename) - # Check for an already existing file at file_path - if not exists(file_path): - # Create directory if not existing - if not exists(dirname): - makedirs(dirname) - - # Get the file - urlretrieve(url, file_path) + # Get the file + urlretrieve(url, file_path) if file_checksum is not None: checksum = _checksum_sha256(file_path) @@ -93,44 +87,71 @@ def fetch(url, filename, dirname = "remote_datasets", file_checksum = None, acce def fetch_spiral_2d(filename = "spiral_2d.csv", dirname = "remote_datasets"): """ - Fetch spiral_2d.csv remotely + Fetch "spiral_2d.csv" remotely. Parameters ---------- filename : string - The name to give to downloaded file. Default is "spiral_2d.csv" + The name to give to downloaded file. Default is "spiral_2d.csv". dirname : string The directory to save the file to. Default is "remote_datasets". Returns ------- - file_path: string - Full path of the created file. + points: array + Array of points stored in "spiral_2d.csv". """ - return fetch("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d.csv", filename, dirname, - '37530355d980d957c4ec06b18c775f90a91e446107d06c6201c9b4000b077f38') + file_url = "https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d.csv" + file_checksum = '37530355d980d957c4ec06b18c775f90a91e446107d06c6201c9b4000b077f38' -def fetch_bunny(filename = "bunny.off", dirname = "remote_datasets/bunny", accept_license = False): + archive_path = join(dirname, filename) + + if not exists(archive_path): + # Create directory if not existing + if not exists(dirname): + makedirs(dirname) + + file_path_pkl = _fetch_remote(file_url, filename, dirname, file_checksum) + + return np.loadtxt(file_path_pkl) + else: + return np.loadtxt(archive_path) + +def fetch_bunny(filename = "bunny.npy", dirname = "remote_datasets/bunny", accept_license = False): """ - Fetch bunny.off remotely and its LICENSE file + Fetch "bunny.npy" remotely and its LICENSE file. Parameters ---------- filename : string - The name to give to downloaded file. Default is "bunny.off" + The name to give to downloaded file. Default is "bunny.npy". dirname : string The directory to save the file to. Default is "remote_datasets/bunny". accept_license : boolean Flag to specify if user accepts the file LICENSE and prevents from printing the corresponding license terms. - Default is False + Default is False. Returns ------- - files_paths: list of strings - Full paths of the created file and its LICENSE. + points: array + Array of points stored in "bunny.npy". """ - return [fetch("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points//bunny/LICENSE", "LICENSE", dirname, - 'aeb1bad319b7d74fa0b8076358182f9c6b1284c67cc07dc67cbc9bc73025d956'), - fetch("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points//bunny/bunny.off", filename, dirname, - '11852d5e73e2d4bd7b86a2c5cc8a5884d0fbb72539493e8cec100ea922b19f5b', accept_license)] + file_url = "https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/bunny/bunny.npy" + file_checksum = '13f7842ebb4b45370e50641ff28c88685703efa5faab14edf0bb7d113a965e1b' + license_url = "https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/bunny/LICENSE" + license_checksum = 'aeb1bad319b7d74fa0b8076358182f9c6b1284c67cc07dc67cbc9bc73025d956' + + archive_path = join(dirname, filename) + + if not exists(archive_path): + # Create directory if not existing + if not exists(dirname): + makedirs(dirname) + + license_path = _fetch_remote(license_url, "LICENSE", dirname, license_checksum) + file_path_pkl = _fetch_remote(file_url, filename, dirname, file_checksum, accept_license) + + return np.load(file_path_pkl, mmap_mode='r') + else: + return np.load(archive_path, mmap_mode='r') diff --git a/src/python/test/test_remote_datasets.py b/src/python/test/test_remote_datasets.py index e777abc6..56a273b4 100644 --- a/src/python/test/test_remote_datasets.py +++ b/src/python/test/test_remote_datasets.py @@ -10,13 +10,14 @@ from gudhi.datasets import remote import re -import os.path +from os.path import isfile, exists +from os import makedirs import io import sys import pytest def _check_dir_file_names(path_file_dw, filename, dirname): - assert os.path.isfile(path_file_dw) + assert isfile(path_file_dw) names_dw = re.split(r' |/|\\', path_file_dw) # Case where inner directories are created in "remote_datasets/"; e.g: "remote_datasets/bunny" @@ -29,15 +30,20 @@ def _check_dir_file_names(path_file_dw, filename, dirname): assert filename == names_dw[1] def _check_fetch_output(url, filename, dirname = "remote_datasets", file_checksum = None): - path_file_dw = remote.fetch(url, filename, dirname, file_checksum) + if not exists(dirname): + makedirs(dirname) + path_file_dw = remote._fetch_remote(url, filename, dirname, file_checksum) _check_dir_file_names(path_file_dw, filename, dirname) def _get_bunny_license_print(accept_license = False): capturedOutput = io.StringIO() # Redirect stdout sys.stdout = capturedOutput - remote.fetch("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points//bunny/bunny.off", "bunny.off", "remote_datasets/bunny", - '11852d5e73e2d4bd7b86a2c5cc8a5884d0fbb72539493e8cec100ea922b19f5b', accept_license) + + if not exists("remote_datasets/bunny"): + makedirs("remote_datasets/bunny") + remote._fetch_remote("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/bunny/bunny.npy", "bunny.npy", "remote_datasets/bunny", + '13f7842ebb4b45370e50641ff28c88685703efa5faab14edf0bb7d113a965e1b', accept_license) # Reset redirect sys.stdout = sys.__stdout__ return capturedOutput @@ -60,20 +66,21 @@ def test_fetch_remote_datasets(): _check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/sphere3D_pts_on_grid.off", "sphere3D_pts_on_grid.off") # Test spiral_2d.csv wrapping function - path_file_dw = remote.fetch_spiral_2d() - _check_dir_file_names(path_file_dw, 'spiral_2d.csv', 'remote_datasets') + spiral_2d_arr = remote.fetch_spiral_2d() + assert spiral_2d_arr.shape == (114562, 2) - # Test printing existing LICENSE file when fetching bunny.off with accept_license = False (default) + # Test printing existing LICENSE file when fetching bunny.npy with accept_license = False (default) # Fetch LICENSE file - remote.fetch("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points//bunny/LICENSE", "LICENSE", "remote_datasets/bunny", + if not exists("remote_datasets/bunny"): + makedirs("remote_datasets/bunny") + remote._fetch_remote("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/bunny/LICENSE", "LICENSE", "remote_datasets/bunny", 'aeb1bad319b7d74fa0b8076358182f9c6b1284c67cc07dc67cbc9bc73025d956') with open("remote_datasets/bunny/LICENSE") as f: assert f.read() == _get_bunny_license_print().getvalue().rstrip("\n") - # Test not printing bunny.off LICENSE when accept_license = True + # Test not printing bunny.npy LICENSE when accept_license = True assert "" == _get_bunny_license_print(accept_license = True).getvalue() # Test fetch_bunny wrapping function - path_file_dw = remote.fetch_bunny() - _check_dir_file_names(path_file_dw[0], 'LICENSE', 'remote_datasets/bunny') - _check_dir_file_names(path_file_dw[1], 'bunny.off', 'remote_datasets/bunny') + bunny_arr = remote.fetch_bunny() + assert bunny_arr.shape == (35947, 3) -- cgit v1.2.3 From ad7a50fb87ed4237b9a02165eac39ae355dd5440 Mon Sep 17 00:00:00 2001 From: Hind-M Date: Tue, 1 Feb 2022 10:32:03 +0100 Subject: Fetch spiral_2d.npy file instead of csv Add some modifications related to those done on files in gudhi-data --- src/python/gudhi/datasets/remote.py | 20 ++++++++++---------- src/python/test/test_remote_datasets.py | 14 +++++++------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/python/gudhi/datasets/remote.py b/src/python/gudhi/datasets/remote.py index ef797417..3498a645 100644 --- a/src/python/gudhi/datasets/remote.py +++ b/src/python/gudhi/datasets/remote.py @@ -85,24 +85,24 @@ def _fetch_remote(url, filename, dirname = "remote_datasets", file_checksum = No return file_path -def fetch_spiral_2d(filename = "spiral_2d.csv", dirname = "remote_datasets"): +def fetch_spiral_2d(filename = "spiral_2d.npy", dirname = "remote_datasets/spiral_2d"): """ - Fetch "spiral_2d.csv" remotely. + Fetch "spiral_2d.npy" remotely. Parameters ---------- filename : string - The name to give to downloaded file. Default is "spiral_2d.csv". + The name to give to downloaded file. Default is "spiral_2d.npy". dirname : string - The directory to save the file to. Default is "remote_datasets". + The directory to save the file to. Default is "remote_datasets/spiral_2d". Returns ------- points: array - Array of points stored in "spiral_2d.csv". + Array of points stored in "spiral_2d.npy". """ - file_url = "https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d.csv" - file_checksum = '37530355d980d957c4ec06b18c775f90a91e446107d06c6201c9b4000b077f38' + file_url = "https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d/spiral_2d.npy" + file_checksum = '88312ffd6df2e2cb2bde9c0e1f962d7d644c6f58dc369c7b377b298dacdc4eaf' archive_path = join(dirname, filename) @@ -113,9 +113,9 @@ def fetch_spiral_2d(filename = "spiral_2d.csv", dirname = "remote_datasets"): file_path_pkl = _fetch_remote(file_url, filename, dirname, file_checksum) - return np.loadtxt(file_path_pkl) + return np.load(file_path_pkl, mmap_mode='r') else: - return np.loadtxt(archive_path) + return np.load(archive_path, mmap_mode='r') def fetch_bunny(filename = "bunny.npy", dirname = "remote_datasets/bunny", accept_license = False): """ @@ -140,7 +140,7 @@ def fetch_bunny(filename = "bunny.npy", dirname = "remote_datasets/bunny", accep file_url = "https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/bunny/bunny.npy" file_checksum = '13f7842ebb4b45370e50641ff28c88685703efa5faab14edf0bb7d113a965e1b' license_url = "https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/bunny/LICENSE" - license_checksum = 'aeb1bad319b7d74fa0b8076358182f9c6b1284c67cc07dc67cbc9bc73025d956' + license_checksum = 'b763dbe1b2fc6015d05cbf7bcc686412a2eb100a1f2220296e3b4a644c69633a' archive_path = join(dirname, filename) diff --git a/src/python/test/test_remote_datasets.py b/src/python/test/test_remote_datasets.py index 56a273b4..2057c63b 100644 --- a/src/python/test/test_remote_datasets.py +++ b/src/python/test/test_remote_datasets.py @@ -51,21 +51,21 @@ def _get_bunny_license_print(accept_license = False): def test_fetch_remote_datasets(): # Test fetch with a wrong checksum with pytest.raises(OSError): - _check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d.csv", "spiral_2d.csv", file_checksum = 'XXXXXXXXXX') + _check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d/spiral_2d.npy", "spiral_2d.npy", file_checksum = 'XXXXXXXXXX') # Test files download from given urls with checksums provided - _check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d.csv", "spiral_2d.csv", - file_checksum = '37530355d980d957c4ec06b18c775f90a91e446107d06c6201c9b4000b077f38') + _check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d/spiral_2d.npy", "spiral_2d.npy", + file_checksum = '88312ffd6df2e2cb2bde9c0e1f962d7d644c6f58dc369c7b377b298dacdc4eaf') _check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/sphere3D_pts_on_grid.off", "sphere3D_pts_on_grid.off", file_checksum = '32f96d2cafb1177f0dd5e0a019b6ff5658e14a619a7815ae55ad0fc5e8bd3f88') # Test files download from given urls without checksums - _check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d.csv", "spiral_2d.csv") + _check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d/spiral_2d.npy", "spiral_2d.npy") _check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/sphere3D_pts_on_grid.off", "sphere3D_pts_on_grid.off") - # Test spiral_2d.csv wrapping function + # Test fetch_spiral_2d wrapping function spiral_2d_arr = remote.fetch_spiral_2d() assert spiral_2d_arr.shape == (114562, 2) @@ -74,9 +74,9 @@ def test_fetch_remote_datasets(): if not exists("remote_datasets/bunny"): makedirs("remote_datasets/bunny") remote._fetch_remote("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/bunny/LICENSE", "LICENSE", "remote_datasets/bunny", - 'aeb1bad319b7d74fa0b8076358182f9c6b1284c67cc07dc67cbc9bc73025d956') + 'b763dbe1b2fc6015d05cbf7bcc686412a2eb100a1f2220296e3b4a644c69633a') with open("remote_datasets/bunny/LICENSE") as f: - assert f.read() == _get_bunny_license_print().getvalue().rstrip("\n") + assert f.read().rstrip("\n") == _get_bunny_license_print().getvalue().rstrip("\n") # Test not printing bunny.npy LICENSE when accept_license = True assert "" == _get_bunny_license_print(accept_license = True).getvalue() -- cgit v1.2.3 From 741f4f182479d1e5e78e9eb9180adce0a72e99b6 Mon Sep 17 00:00:00 2001 From: Hind-M Date: Wed, 2 Feb 2022 10:38:15 +0100 Subject: Modify remote fetching test to increase its coverage --- src/python/test/test_remote_datasets.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/src/python/test/test_remote_datasets.py b/src/python/test/test_remote_datasets.py index 2057c63b..dac9ee80 100644 --- a/src/python/test/test_remote_datasets.py +++ b/src/python/test/test_remote_datasets.py @@ -40,8 +40,6 @@ def _get_bunny_license_print(accept_license = False): # Redirect stdout sys.stdout = capturedOutput - if not exists("remote_datasets/bunny"): - makedirs("remote_datasets/bunny") remote._fetch_remote("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/bunny/bunny.npy", "bunny.npy", "remote_datasets/bunny", '13f7842ebb4b45370e50641ff28c88685703efa5faab14edf0bb7d113a965e1b', accept_license) # Reset redirect @@ -65,22 +63,17 @@ def test_fetch_remote_datasets(): _check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/sphere3D_pts_on_grid.off", "sphere3D_pts_on_grid.off") - # Test fetch_spiral_2d wrapping function - spiral_2d_arr = remote.fetch_spiral_2d() - assert spiral_2d_arr.shape == (114562, 2) + # Test fetch_spiral_2d and fetch_bunny wrapping functions (twice, to test case of already fetched files) + for i in range(2): + spiral_2d_arr = remote.fetch_spiral_2d() + assert spiral_2d_arr.shape == (114562, 2) + + bunny_arr = remote.fetch_bunny() + assert bunny_arr.shape == (35947, 3) # Test printing existing LICENSE file when fetching bunny.npy with accept_license = False (default) - # Fetch LICENSE file - if not exists("remote_datasets/bunny"): - makedirs("remote_datasets/bunny") - remote._fetch_remote("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/bunny/LICENSE", "LICENSE", "remote_datasets/bunny", - 'b763dbe1b2fc6015d05cbf7bcc686412a2eb100a1f2220296e3b4a644c69633a') with open("remote_datasets/bunny/LICENSE") as f: assert f.read().rstrip("\n") == _get_bunny_license_print().getvalue().rstrip("\n") # Test not printing bunny.npy LICENSE when accept_license = True assert "" == _get_bunny_license_print(accept_license = True).getvalue() - - # Test fetch_bunny wrapping function - bunny_arr = remote.fetch_bunny() - assert bunny_arr.shape == (35947, 3) -- cgit v1.2.3 From 19689c712a1f5945e664f9c74c14b6994e7afaaf Mon Sep 17 00:00:00 2001 From: Hind-M Date: Wed, 2 Feb 2022 16:14:17 +0100 Subject: Try to fix failing test in windows --- src/python/test/test_remote_datasets.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/python/test/test_remote_datasets.py b/src/python/test/test_remote_datasets.py index dac9ee80..643485f9 100644 --- a/src/python/test/test_remote_datasets.py +++ b/src/python/test/test_remote_datasets.py @@ -40,6 +40,9 @@ def _get_bunny_license_print(accept_license = False): # Redirect stdout sys.stdout = capturedOutput + if not exists("remote_datasets/bunny"): + makedirs("remote_datasets/bunny") + remote._fetch_remote("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/bunny/bunny.npy", "bunny.npy", "remote_datasets/bunny", '13f7842ebb4b45370e50641ff28c88685703efa5faab14edf0bb7d113a965e1b', accept_license) # Reset redirect @@ -72,6 +75,11 @@ def test_fetch_remote_datasets(): assert bunny_arr.shape == (35947, 3) # Test printing existing LICENSE file when fetching bunny.npy with accept_license = False (default) + # Fetch LICENSE file + if not exists("remote_datasets/bunny"): + makedirs("remote_datasets/bunny") + remote._fetch_remote("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/bunny/LICENSE", "LICENSE", "remote_datasets/bunny", + 'b763dbe1b2fc6015d05cbf7bcc686412a2eb100a1f2220296e3b4a644c69633a') with open("remote_datasets/bunny/LICENSE") as f: assert f.read().rstrip("\n") == _get_bunny_license_print().getvalue().rstrip("\n") -- cgit v1.2.3 From a2d55f9bbf0f45e3ae4c147f734ce04f5bc87ab8 Mon Sep 17 00:00:00 2001 From: Hind-M Date: Wed, 2 Feb 2022 21:32:55 +0100 Subject: Another attempt to fix windows failing test: move fetch_bunny to the end --- src/python/test/test_remote_datasets.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/python/test/test_remote_datasets.py b/src/python/test/test_remote_datasets.py index 643485f9..5e607d73 100644 --- a/src/python/test/test_remote_datasets.py +++ b/src/python/test/test_remote_datasets.py @@ -66,14 +66,6 @@ def test_fetch_remote_datasets(): _check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/sphere3D_pts_on_grid.off", "sphere3D_pts_on_grid.off") - # Test fetch_spiral_2d and fetch_bunny wrapping functions (twice, to test case of already fetched files) - for i in range(2): - spiral_2d_arr = remote.fetch_spiral_2d() - assert spiral_2d_arr.shape == (114562, 2) - - bunny_arr = remote.fetch_bunny() - assert bunny_arr.shape == (35947, 3) - # Test printing existing LICENSE file when fetching bunny.npy with accept_license = False (default) # Fetch LICENSE file if not exists("remote_datasets/bunny"): @@ -85,3 +77,11 @@ def test_fetch_remote_datasets(): # Test not printing bunny.npy LICENSE when accept_license = True assert "" == _get_bunny_license_print(accept_license = True).getvalue() + + # Test fetch_spiral_2d and fetch_bunny wrapping functions (twice, to test case of already fetched files) + for i in range(2): + spiral_2d_arr = remote.fetch_spiral_2d() + assert spiral_2d_arr.shape == (114562, 2) + + bunny_arr = remote.fetch_bunny() + assert bunny_arr.shape == (35947, 3) -- cgit v1.2.3 From 6109fd920ba477f89e83fea3df9803232c169463 Mon Sep 17 00:00:00 2001 From: Hind-M Date: Thu, 3 Feb 2022 10:24:38 +0100 Subject: Remove archive folder before testing wrapping functions --- src/python/test/test_remote_datasets.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/python/test/test_remote_datasets.py b/src/python/test/test_remote_datasets.py index 5e607d73..93a8a982 100644 --- a/src/python/test/test_remote_datasets.py +++ b/src/python/test/test_remote_datasets.py @@ -78,6 +78,9 @@ def test_fetch_remote_datasets(): # Test not printing bunny.npy LICENSE when accept_license = True assert "" == _get_bunny_license_print(accept_license = True).getvalue() + # Remove "remote_datasets" directory and all its content + import shutil + shutil.rmtree("remote_datasets") # Test fetch_spiral_2d and fetch_bunny wrapping functions (twice, to test case of already fetched files) for i in range(2): spiral_2d_arr = remote.fetch_spiral_2d() -- cgit v1.2.3 From a13282e4da9910a5d2bdadf97040095ae5b7880a Mon Sep 17 00:00:00 2001 From: Hind-M Date: Fri, 4 Feb 2022 15:39:51 +0100 Subject: Store fetched datasets in user directory by default --- src/python/gudhi/datasets/remote.py | 68 ++++++++++++++++++++++++++------- src/python/test/test_remote_datasets.py | 31 +++++++++++---- 2 files changed, 79 insertions(+), 20 deletions(-) diff --git a/src/python/gudhi/datasets/remote.py b/src/python/gudhi/datasets/remote.py index 3498a645..3d6c01b0 100644 --- a/src/python/gudhi/datasets/remote.py +++ b/src/python/gudhi/datasets/remote.py @@ -7,14 +7,52 @@ # Modification(s): # - YYYY/MM Author: Description of the modification -from os.path import join, exists +from os.path import join, exists, expanduser from os import makedirs from urllib.request import urlretrieve import hashlib +import shutil import numpy as np +def get_data_home(data_home = None): + """ + Return the path of the remote datasets directory. + This folder is used to store remotely fetched datasets. + By default the datasets directory is set to a folder named 'remote_datasets' in the user home folder. + Alternatively, it can be set by giving an explicit folder path. The '~' symbol is expanded to the user home folder. + If the folder does not already exist, it is automatically created. + + Parameters + ---------- + data_home : string + The path to remote datasets directory. Default is `None`, meaning that the data home directory will be set to "~/remote_datasets". + + Returns + ------- + data_home: string + The path to remote datasets directory. + """ + if data_home is None: + data_home = join("~", "remote_datasets") + data_home = expanduser(data_home) + makedirs(data_home, exist_ok=True) + return data_home + + +def clear_data_home(data_home = None): + """ + Delete all the content of the data home cache. + + Parameters + ---------- + data_home : string, default is None. + The path to remote datasets directory. If `None`, the default directory to be removed is set to "~/remote_datasets". + """ + data_home = get_data_home(data_home) + shutil.rmtree(data_home) + def _checksum_sha256(file_path): """ Compute the file checksum using sha256. @@ -85,7 +123,7 @@ def _fetch_remote(url, filename, dirname = "remote_datasets", file_checksum = No return file_path -def fetch_spiral_2d(filename = "spiral_2d.npy", dirname = "remote_datasets/spiral_2d"): +def fetch_spiral_2d(filename = "spiral_2d.npy", dirname = None): """ Fetch "spiral_2d.npy" remotely. @@ -94,7 +132,7 @@ def fetch_spiral_2d(filename = "spiral_2d.npy", dirname = "remote_datasets/spira filename : string The name to give to downloaded file. Default is "spiral_2d.npy". dirname : string - The directory to save the file to. Default is "remote_datasets/spiral_2d". + The directory to save the file to. Default is None, meaning that the data home will be set to "~/remote_datasets/spiral_2d". Returns ------- @@ -104,20 +142,22 @@ def fetch_spiral_2d(filename = "spiral_2d.npy", dirname = "remote_datasets/spira file_url = "https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d/spiral_2d.npy" file_checksum = '88312ffd6df2e2cb2bde9c0e1f962d7d644c6f58dc369c7b377b298dacdc4eaf' + if dirname is None: + dirname = join(get_data_home(dirname), "spiral_2d") + makedirs(dirname, exist_ok=True) + else: + dirname = get_data_home(dirname) + archive_path = join(dirname, filename) if not exists(archive_path): - # Create directory if not existing - if not exists(dirname): - makedirs(dirname) - file_path_pkl = _fetch_remote(file_url, filename, dirname, file_checksum) return np.load(file_path_pkl, mmap_mode='r') else: return np.load(archive_path, mmap_mode='r') -def fetch_bunny(filename = "bunny.npy", dirname = "remote_datasets/bunny", accept_license = False): +def fetch_bunny(filename = "bunny.npy", dirname = None, accept_license = False): """ Fetch "bunny.npy" remotely and its LICENSE file. @@ -126,7 +166,7 @@ def fetch_bunny(filename = "bunny.npy", dirname = "remote_datasets/bunny", accep filename : string The name to give to downloaded file. Default is "bunny.npy". dirname : string - The directory to save the file to. Default is "remote_datasets/bunny". + The directory to save the file to. Default is None, meaning that the data home will be set to "~/remote_datasets/bunny". accept_license : boolean Flag to specify if user accepts the file LICENSE and prevents from printing the corresponding license terms. Default is False. @@ -142,13 +182,15 @@ def fetch_bunny(filename = "bunny.npy", dirname = "remote_datasets/bunny", accep license_url = "https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/bunny/LICENSE" license_checksum = 'b763dbe1b2fc6015d05cbf7bcc686412a2eb100a1f2220296e3b4a644c69633a' + if dirname is None: + dirname = join(get_data_home(dirname), "bunny") + makedirs(dirname, exist_ok=True) + else: + dirname = get_data_home(dirname) + archive_path = join(dirname, filename) if not exists(archive_path): - # Create directory if not existing - if not exists(dirname): - makedirs(dirname) - license_path = _fetch_remote(license_url, "LICENSE", dirname, license_checksum) file_path_pkl = _fetch_remote(file_url, filename, dirname, file_checksum, accept_license) diff --git a/src/python/test/test_remote_datasets.py b/src/python/test/test_remote_datasets.py index 93a8a982..27eb51b0 100644 --- a/src/python/test/test_remote_datasets.py +++ b/src/python/test/test_remote_datasets.py @@ -10,7 +10,7 @@ from gudhi.datasets import remote import re -from os.path import isfile, exists +from os.path import isfile, isdir, expanduser from os import makedirs import io import sys @@ -30,8 +30,7 @@ def _check_dir_file_names(path_file_dw, filename, dirname): assert filename == names_dw[1] def _check_fetch_output(url, filename, dirname = "remote_datasets", file_checksum = None): - if not exists(dirname): - makedirs(dirname) + makedirs(dirname, exist_ok=True) path_file_dw = remote._fetch_remote(url, filename, dirname, file_checksum) _check_dir_file_names(path_file_dw, filename, dirname) @@ -40,8 +39,7 @@ def _get_bunny_license_print(accept_license = False): # Redirect stdout sys.stdout = capturedOutput - if not exists("remote_datasets/bunny"): - makedirs("remote_datasets/bunny") + makedirs("remote_datasets/bunny", exist_ok=True) remote._fetch_remote("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/bunny/bunny.npy", "bunny.npy", "remote_datasets/bunny", '13f7842ebb4b45370e50641ff28c88685703efa5faab14edf0bb7d113a965e1b', accept_license) @@ -68,8 +66,7 @@ def test_fetch_remote_datasets(): # Test printing existing LICENSE file when fetching bunny.npy with accept_license = False (default) # Fetch LICENSE file - if not exists("remote_datasets/bunny"): - makedirs("remote_datasets/bunny") + makedirs("remote_datasets/bunny", exist_ok=True) remote._fetch_remote("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/bunny/LICENSE", "LICENSE", "remote_datasets/bunny", 'b763dbe1b2fc6015d05cbf7bcc686412a2eb100a1f2220296e3b4a644c69633a') with open("remote_datasets/bunny/LICENSE") as f: @@ -88,3 +85,23 @@ def test_fetch_remote_datasets(): bunny_arr = remote.fetch_bunny() assert bunny_arr.shape == (35947, 3) + + # Check that default dir was created + assert isdir(expanduser("~/remote_datasets")) == True + + # Test clear_data_home + clear_data_home() + assert isdir(expanduser("~/remote_datasets")) == False + + # Test fetch_spiral_2d and fetch_bunny wrapping functions with data directory different from default + spiral_2d_arr = remote.fetch_spiral_2d(dirname = "~/test") + assert spiral_2d_arr.shape == (114562, 2) + + bunny_arr = remote.fetch_bunny(dirname = "~/test") + assert bunny_arr.shape == (35947, 3) + + assert isdir(expanduser("~/test")) == True + + # Test clear_data_home with data directory different from default + clear_data_home("~/test") + assert isdir(expanduser("~/test")) == False -- cgit v1.2.3 From b0071de9ee7b6b4feb2eb9f19ceb759de21c997f Mon Sep 17 00:00:00 2001 From: Hind-M Date: Fri, 4 Feb 2022 16:09:54 +0100 Subject: Add forgotten module name before func call --- src/python/test/test_remote_datasets.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/python/test/test_remote_datasets.py b/src/python/test/test_remote_datasets.py index 27eb51b0..9532b4ec 100644 --- a/src/python/test/test_remote_datasets.py +++ b/src/python/test/test_remote_datasets.py @@ -90,7 +90,7 @@ def test_fetch_remote_datasets(): assert isdir(expanduser("~/remote_datasets")) == True # Test clear_data_home - clear_data_home() + remote.clear_data_home() assert isdir(expanduser("~/remote_datasets")) == False # Test fetch_spiral_2d and fetch_bunny wrapping functions with data directory different from default @@ -103,5 +103,5 @@ def test_fetch_remote_datasets(): assert isdir(expanduser("~/test")) == True # Test clear_data_home with data directory different from default - clear_data_home("~/test") + remote.clear_data_home("~/test") assert isdir(expanduser("~/test")) == False -- cgit v1.2.3 From b5d7d6c2857d305ba2828065310c11edefb37c4e Mon Sep 17 00:00:00 2001 From: Hind-M Date: Mon, 7 Feb 2022 13:03:45 +0100 Subject: Test get_data_home and clear_data_home on a separate folder --- src/python/test/test_remote_datasets.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/python/test/test_remote_datasets.py b/src/python/test/test_remote_datasets.py index 9532b4ec..c160f270 100644 --- a/src/python/test/test_remote_datasets.py +++ b/src/python/test/test_remote_datasets.py @@ -87,21 +87,20 @@ def test_fetch_remote_datasets(): assert bunny_arr.shape == (35947, 3) # Check that default dir was created - assert isdir(expanduser("~/remote_datasets")) == True - - # Test clear_data_home - remote.clear_data_home() - assert isdir(expanduser("~/remote_datasets")) == False + assert isdir(expanduser("~/remote_datasets")) # Test fetch_spiral_2d and fetch_bunny wrapping functions with data directory different from default - spiral_2d_arr = remote.fetch_spiral_2d(dirname = "~/test") + spiral_2d_arr = remote.fetch_spiral_2d(dirname = "~/another_fetch_folder") assert spiral_2d_arr.shape == (114562, 2) - bunny_arr = remote.fetch_bunny(dirname = "~/test") + bunny_arr = remote.fetch_bunny(dirname = "~/another_fetch_folder") assert bunny_arr.shape == (35947, 3) - assert isdir(expanduser("~/test")) == True + assert isdir(expanduser("~/another_fetch_folder")) + + # Test get_data_home and clear_data_home on new empty folder + empty_data_home = remote.get_data_home(data_home="empty_folder") + assert isdir(empty_data_home) - # Test clear_data_home with data directory different from default - remote.clear_data_home("~/test") - assert isdir(expanduser("~/test")) == False + remote.clear_data_home(data_home=empty_data_home) + assert not isdir(empty_data_home) -- cgit v1.2.3 From e9b020adf11d48ce7a88932a5fe12cef011e72c9 Mon Sep 17 00:00:00 2001 From: Hind-M Date: Mon, 7 Feb 2022 13:18:57 +0100 Subject: Separate tests into different functions and remove all test folders at the end --- src/python/test/test_remote_datasets.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/python/test/test_remote_datasets.py b/src/python/test/test_remote_datasets.py index c160f270..2e595423 100644 --- a/src/python/test/test_remote_datasets.py +++ b/src/python/test/test_remote_datasets.py @@ -7,15 +7,17 @@ # Modification(s): # - YYYY/MM Author: Description of the modification - from gudhi.datasets import remote + import re -from os.path import isfile, isdir, expanduser -from os import makedirs +import shutil import io import sys import pytest +from os.path import isfile, isdir, expanduser +from os import makedirs + def _check_dir_file_names(path_file_dw, filename, dirname): assert isfile(path_file_dw) @@ -76,8 +78,9 @@ def test_fetch_remote_datasets(): assert "" == _get_bunny_license_print(accept_license = True).getvalue() # Remove "remote_datasets" directory and all its content - import shutil shutil.rmtree("remote_datasets") + +def test_fetch_remote_datasets_wrapped(): # Test fetch_spiral_2d and fetch_bunny wrapping functions (twice, to test case of already fetched files) for i in range(2): spiral_2d_arr = remote.fetch_spiral_2d() @@ -98,6 +101,14 @@ def test_fetch_remote_datasets(): assert isdir(expanduser("~/another_fetch_folder")) + # Remove test folders + shutil.rmtree(expanduser("~/remote_datasets")) + shutil.rmtree(expanduser("~/another_fetch_folder")) + + assert not isdir(expanduser("~/remote_datasets")) + assert not isdir(expanduser("~/another_fetch_folder")) + +def test_data_home(): # Test get_data_home and clear_data_home on new empty folder empty_data_home = remote.get_data_home(data_home="empty_folder") assert isdir(empty_data_home) -- cgit v1.2.3 From e964ec32247ce02fb12939cfcddaeabc04639869 Mon Sep 17 00:00:00 2001 From: Hind-M Date: Mon, 7 Feb 2022 16:52:55 +0100 Subject: Del used variables before removing folders --- src/python/test/test_remote_datasets.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/python/test/test_remote_datasets.py b/src/python/test/test_remote_datasets.py index 2e595423..cb53cb85 100644 --- a/src/python/test/test_remote_datasets.py +++ b/src/python/test/test_remote_datasets.py @@ -102,6 +102,8 @@ def test_fetch_remote_datasets_wrapped(): assert isdir(expanduser("~/another_fetch_folder")) # Remove test folders + del spiral_2d_arr + del bunny_arr shutil.rmtree(expanduser("~/remote_datasets")) shutil.rmtree(expanduser("~/another_fetch_folder")) -- cgit v1.2.3 From 5c0c731fdd2bc41c2a4833be1612dca5a082c337 Mon Sep 17 00:00:00 2001 From: Hind-M Date: Wed, 2 Mar 2022 10:26:52 +0100 Subject: Modifications following PR review --- src/python/gudhi/datasets/remote.py | 60 ++++++++++++++++++--------------- src/python/test/test_remote_datasets.py | 38 ++++++++++----------- 2 files changed, 51 insertions(+), 47 deletions(-) diff --git a/src/python/gudhi/datasets/remote.py b/src/python/gudhi/datasets/remote.py index 3d6c01b0..618fa80e 100644 --- a/src/python/gudhi/datasets/remote.py +++ b/src/python/gudhi/datasets/remote.py @@ -20,14 +20,14 @@ def get_data_home(data_home = None): """ Return the path of the remote datasets directory. This folder is used to store remotely fetched datasets. - By default the datasets directory is set to a folder named 'remote_datasets' in the user home folder. + By default the datasets directory is set to a folder named 'gudhi_data' in the user home folder. Alternatively, it can be set by giving an explicit folder path. The '~' symbol is expanded to the user home folder. If the folder does not already exist, it is automatically created. Parameters ---------- data_home : string - The path to remote datasets directory. Default is `None`, meaning that the data home directory will be set to "~/remote_datasets". + The path to remote datasets directory. Default is `None`, meaning that the data home directory will be set to "~/gudhi_data". Returns ------- @@ -35,7 +35,7 @@ def get_data_home(data_home = None): The path to remote datasets directory. """ if data_home is None: - data_home = join("~", "remote_datasets") + data_home = join("~", "gudhi_data") data_home = expanduser(data_home) makedirs(data_home, exist_ok=True) return data_home @@ -43,12 +43,12 @@ def get_data_home(data_home = None): def clear_data_home(data_home = None): """ - Delete all the content of the data home cache. + Delete the data home cache directory and all its content. Parameters ---------- data_home : string, default is None. - The path to remote datasets directory. If `None`, the default directory to be removed is set to "~/remote_datasets". + The path to remote datasets directory. If `None`, the default directory to be removed is set to "~/gudhi_data". """ data_home = get_data_home(data_home) shutil.rmtree(data_home) @@ -77,7 +77,7 @@ def _checksum_sha256(file_path): sha256_hash.update(buffer) return sha256_hash.hexdigest() -def _fetch_remote(url, filename, dirname = "remote_datasets", file_checksum = None, accept_license = False): +def _fetch_remote(url, filename, dirname = "gudhi_data", file_checksum = None, accept_license = False): """ Fetch the wanted dataset from the given url and save it in file_path. @@ -88,10 +88,10 @@ def _fetch_remote(url, filename, dirname = "remote_datasets", file_checksum = No filename : string The name to give to downloaded file. dirname : string - The directory to save the file to. Default is "remote_datasets". + The directory to save the file to. Default is "gudhi_data". file_checksum : string The file checksum using sha256 to check against the one computed on the downloaded file. - Default is 'None'. + Default is 'None', which means the checksum is not checked. accept_license : boolean Flag to specify if user accepts the file LICENSE and prevents from printing the corresponding license terms. Default is False. @@ -100,6 +100,11 @@ def _fetch_remote(url, filename, dirname = "remote_datasets", file_checksum = No ------- file_path: string Full path of the created file. + + Raises + ------ + IOError + If the computed SHA256 checksum of file does not match the one given by the user. """ file_path = join(dirname, filename) @@ -123,32 +128,37 @@ def _fetch_remote(url, filename, dirname = "remote_datasets", file_checksum = No return file_path +def _get_archive_and_dir(dirname, filename, label): + if dirname is None: + dirname = join(get_data_home(dirname), label) + makedirs(dirname, exist_ok=True) + else: + dirname = get_data_home(dirname) + + archive_path = join(dirname, filename) + + return archive_path, dirname + def fetch_spiral_2d(filename = "spiral_2d.npy", dirname = None): """ - Fetch "spiral_2d.npy" remotely. + Fetch spiral_2d dataset remotely. Parameters ---------- filename : string The name to give to downloaded file. Default is "spiral_2d.npy". dirname : string - The directory to save the file to. Default is None, meaning that the data home will be set to "~/remote_datasets/spiral_2d". + The directory to save the file to. Default is None, meaning that the data home will be set to "~/gudhi_data/spiral_2d". Returns ------- points: array - Array of points stored in "spiral_2d.npy". + Array of points. """ file_url = "https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d/spiral_2d.npy" file_checksum = '88312ffd6df2e2cb2bde9c0e1f962d7d644c6f58dc369c7b377b298dacdc4eaf' - if dirname is None: - dirname = join(get_data_home(dirname), "spiral_2d") - makedirs(dirname, exist_ok=True) - else: - dirname = get_data_home(dirname) - - archive_path = join(dirname, filename) + archive_path, dirname = _get_archive_and_dir(dirname, filename, "spiral_2d") if not exists(archive_path): file_path_pkl = _fetch_remote(file_url, filename, dirname, file_checksum) @@ -159,14 +169,14 @@ def fetch_spiral_2d(filename = "spiral_2d.npy", dirname = None): def fetch_bunny(filename = "bunny.npy", dirname = None, accept_license = False): """ - Fetch "bunny.npy" remotely and its LICENSE file. + Fetch Stanford bunny dataset remotely and its LICENSE file. Parameters ---------- filename : string The name to give to downloaded file. Default is "bunny.npy". dirname : string - The directory to save the file to. Default is None, meaning that the data home will be set to "~/remote_datasets/bunny". + The directory to save the file to. Default is None, meaning that the data home will be set to "~/gudhi_data/bunny". accept_license : boolean Flag to specify if user accepts the file LICENSE and prevents from printing the corresponding license terms. Default is False. @@ -174,7 +184,7 @@ def fetch_bunny(filename = "bunny.npy", dirname = None, accept_license = False): Returns ------- points: array - Array of points stored in "bunny.npy". + Array of points. """ file_url = "https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/bunny/bunny.npy" @@ -182,13 +192,7 @@ def fetch_bunny(filename = "bunny.npy", dirname = None, accept_license = False): license_url = "https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/bunny/LICENSE" license_checksum = 'b763dbe1b2fc6015d05cbf7bcc686412a2eb100a1f2220296e3b4a644c69633a' - if dirname is None: - dirname = join(get_data_home(dirname), "bunny") - makedirs(dirname, exist_ok=True) - else: - dirname = get_data_home(dirname) - - archive_path = join(dirname, filename) + archive_path, dirname = _get_archive_and_dir(dirname, filename, "bunny") if not exists(archive_path): license_path = _fetch_remote(license_url, "LICENSE", dirname, license_checksum) diff --git a/src/python/test/test_remote_datasets.py b/src/python/test/test_remote_datasets.py index cb53cb85..c44ac22b 100644 --- a/src/python/test/test_remote_datasets.py +++ b/src/python/test/test_remote_datasets.py @@ -22,7 +22,7 @@ def _check_dir_file_names(path_file_dw, filename, dirname): assert isfile(path_file_dw) names_dw = re.split(r' |/|\\', path_file_dw) - # Case where inner directories are created in "remote_datasets/"; e.g: "remote_datasets/bunny" + # Case where inner directories are created in "test_gudhi_data/"; e.g: "test_gudhi_data/bunny" if len(names_dw) >= 3: for i in range(len(names_dw)-1): assert re.split(r' |/|\\', dirname)[i] == names_dw[i] @@ -31,7 +31,7 @@ def _check_dir_file_names(path_file_dw, filename, dirname): assert dirname == names_dw[0] assert filename == names_dw[1] -def _check_fetch_output(url, filename, dirname = "remote_datasets", file_checksum = None): +def _check_fetch_output(url, filename, dirname = "test_gudhi_data", file_checksum = None): makedirs(dirname, exist_ok=True) path_file_dw = remote._fetch_remote(url, filename, dirname, file_checksum) _check_dir_file_names(path_file_dw, filename, dirname) @@ -41,9 +41,9 @@ def _get_bunny_license_print(accept_license = False): # Redirect stdout sys.stdout = capturedOutput - makedirs("remote_datasets/bunny", exist_ok=True) + makedirs("test_gudhi_data/bunny", exist_ok=True) - remote._fetch_remote("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/bunny/bunny.npy", "bunny.npy", "remote_datasets/bunny", + remote._fetch_remote("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/bunny/bunny.npy", "bunny.npy", "test_gudhi_data/bunny", '13f7842ebb4b45370e50641ff28c88685703efa5faab14edf0bb7d113a965e1b', accept_license) # Reset redirect sys.stdout = sys.__stdout__ @@ -68,19 +68,21 @@ def test_fetch_remote_datasets(): # Test printing existing LICENSE file when fetching bunny.npy with accept_license = False (default) # Fetch LICENSE file - makedirs("remote_datasets/bunny", exist_ok=True) - remote._fetch_remote("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/bunny/LICENSE", "LICENSE", "remote_datasets/bunny", + makedirs("test_gudhi_data/bunny", exist_ok=True) + remote._fetch_remote("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/bunny/LICENSE", "LICENSE", "test_gudhi_data/bunny", 'b763dbe1b2fc6015d05cbf7bcc686412a2eb100a1f2220296e3b4a644c69633a') - with open("remote_datasets/bunny/LICENSE") as f: + with open("test_gudhi_data/bunny/LICENSE") as f: assert f.read().rstrip("\n") == _get_bunny_license_print().getvalue().rstrip("\n") # Test not printing bunny.npy LICENSE when accept_license = True assert "" == _get_bunny_license_print(accept_license = True).getvalue() - # Remove "remote_datasets" directory and all its content - shutil.rmtree("remote_datasets") + # Remove "test_gudhi_data" directory and all its content + shutil.rmtree("test_gudhi_data") def test_fetch_remote_datasets_wrapped(): + # Check if gudhi_data default dir exists already + to_be_removed = not isdir(expanduser("~/gudhi_data")) # Test fetch_spiral_2d and fetch_bunny wrapping functions (twice, to test case of already fetched files) for i in range(2): spiral_2d_arr = remote.fetch_spiral_2d() @@ -90,29 +92,27 @@ def test_fetch_remote_datasets_wrapped(): assert bunny_arr.shape == (35947, 3) # Check that default dir was created - assert isdir(expanduser("~/remote_datasets")) + assert isdir(expanduser("~/gudhi_data")) # Test fetch_spiral_2d and fetch_bunny wrapping functions with data directory different from default - spiral_2d_arr = remote.fetch_spiral_2d(dirname = "~/another_fetch_folder") + spiral_2d_arr = remote.fetch_spiral_2d(dirname = "./another_fetch_folder_for_test") assert spiral_2d_arr.shape == (114562, 2) - bunny_arr = remote.fetch_bunny(dirname = "~/another_fetch_folder") + bunny_arr = remote.fetch_bunny(dirname = "./another_fetch_folder_for_test") assert bunny_arr.shape == (35947, 3) - assert isdir(expanduser("~/another_fetch_folder")) + assert isdir(expanduser("./another_fetch_folder_for_test")) # Remove test folders del spiral_2d_arr del bunny_arr - shutil.rmtree(expanduser("~/remote_datasets")) - shutil.rmtree(expanduser("~/another_fetch_folder")) - - assert not isdir(expanduser("~/remote_datasets")) - assert not isdir(expanduser("~/another_fetch_folder")) + if to_be_removed: + shutil.rmtree(expanduser("~/gudhi_data")) + shutil.rmtree(expanduser("./another_fetch_folder_for_test")) def test_data_home(): # Test get_data_home and clear_data_home on new empty folder - empty_data_home = remote.get_data_home(data_home="empty_folder") + empty_data_home = remote.get_data_home(data_home="empty_folder_for_test") assert isdir(empty_data_home) remote.clear_data_home(data_home=empty_data_home) -- cgit v1.2.3 From 58e2f677081b4e9f21c47d6286b329218aa825d6 Mon Sep 17 00:00:00 2001 From: Hind-M Date: Wed, 2 Mar 2022 17:58:39 +0100 Subject: Remove file when given checksum does not match Add more details to doc Remove default dirname value in _fetch_remote Add points/ subfolder in fetching functions --- src/python/gudhi/datasets/remote.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/python/gudhi/datasets/remote.py b/src/python/gudhi/datasets/remote.py index 618fa80e..8b3baef4 100644 --- a/src/python/gudhi/datasets/remote.py +++ b/src/python/gudhi/datasets/remote.py @@ -8,7 +8,7 @@ # - YYYY/MM Author: Description of the modification from os.path import join, exists, expanduser -from os import makedirs +from os import makedirs, remove from urllib.request import urlretrieve import hashlib @@ -77,7 +77,7 @@ def _checksum_sha256(file_path): sha256_hash.update(buffer) return sha256_hash.hexdigest() -def _fetch_remote(url, filename, dirname = "gudhi_data", file_checksum = None, accept_license = False): +def _fetch_remote(url, filename, dirname, file_checksum = None, accept_license = False): """ Fetch the wanted dataset from the given url and save it in file_path. @@ -88,7 +88,7 @@ def _fetch_remote(url, filename, dirname = "gudhi_data", file_checksum = None, a filename : string The name to give to downloaded file. dirname : string - The directory to save the file to. Default is "gudhi_data". + The directory to save the file to. file_checksum : string The file checksum using sha256 to check against the one computed on the downloaded file. Default is 'None', which means the checksum is not checked. @@ -115,6 +115,8 @@ def _fetch_remote(url, filename, dirname = "gudhi_data", file_checksum = None, a if file_checksum is not None: checksum = _checksum_sha256(file_path) if file_checksum != checksum: + # Remove file and raise error + remove(file_path) raise IOError("{} has a SHA256 checksum : {}, " "different from expected : {}." "The file may be corrupted or the given url may be wrong !".format(file_path, checksum, file_checksum)) @@ -148,17 +150,17 @@ def fetch_spiral_2d(filename = "spiral_2d.npy", dirname = None): filename : string The name to give to downloaded file. Default is "spiral_2d.npy". dirname : string - The directory to save the file to. Default is None, meaning that the data home will be set to "~/gudhi_data/spiral_2d". + The directory to save the file to. Default is None, meaning that the downloaded file will be put in "~/gudhi_data/points/spiral_2d". Returns ------- - points: array - Array of points. + points: numpy array + Array of shape (114562, 2). """ file_url = "https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d/spiral_2d.npy" file_checksum = '88312ffd6df2e2cb2bde9c0e1f962d7d644c6f58dc369c7b377b298dacdc4eaf' - archive_path, dirname = _get_archive_and_dir(dirname, filename, "spiral_2d") + archive_path, dirname = _get_archive_and_dir(dirname, filename, "points/spiral_2d") if not exists(archive_path): file_path_pkl = _fetch_remote(file_url, filename, dirname, file_checksum) @@ -170,21 +172,22 @@ def fetch_spiral_2d(filename = "spiral_2d.npy", dirname = None): def fetch_bunny(filename = "bunny.npy", dirname = None, accept_license = False): """ Fetch Stanford bunny dataset remotely and its LICENSE file. + This dataset contains 35947 vertices. Parameters ---------- filename : string The name to give to downloaded file. Default is "bunny.npy". dirname : string - The directory to save the file to. Default is None, meaning that the data home will be set to "~/gudhi_data/bunny". + The directory to save the file to. Default is None, meaning that the downloaded files will be put in "~/gudhi_data/points/bunny". accept_license : boolean Flag to specify if user accepts the file LICENSE and prevents from printing the corresponding license terms. Default is False. Returns ------- - points: array - Array of points. + points: numpy array + Array of shape (35947, 3). """ file_url = "https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/bunny/bunny.npy" @@ -192,7 +195,7 @@ def fetch_bunny(filename = "bunny.npy", dirname = None, accept_license = False): license_url = "https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/bunny/LICENSE" license_checksum = 'b763dbe1b2fc6015d05cbf7bcc686412a2eb100a1f2220296e3b4a644c69633a' - archive_path, dirname = _get_archive_and_dir(dirname, filename, "bunny") + archive_path, dirname = _get_archive_and_dir(dirname, filename, "points/bunny") if not exists(archive_path): license_path = _fetch_remote(license_url, "LICENSE", dirname, license_checksum) -- cgit v1.2.3 From 0047eaacaffef2b3da6207123da3ef3d919c0b27 Mon Sep 17 00:00:00 2001 From: Hind-M Date: Wed, 9 Mar 2022 15:56:23 +0100 Subject: Add bunny image to the datasets doc --- src/python/doc/datasets.rst | 6 ++++++ src/python/doc/img/bunny.png | Bin 0 -> 48040 bytes 2 files changed, 6 insertions(+) create mode 100644 src/python/doc/img/bunny.png diff --git a/src/python/doc/datasets.rst b/src/python/doc/datasets.rst index 4fa8a628..62b7dca0 100644 --- a/src/python/doc/datasets.rst +++ b/src/python/doc/datasets.rst @@ -112,6 +112,12 @@ Fetching datasets We provide some ready-to-use datasets that are not available by default when getting GUDHI, and need to be fetched explicitly. +.. figure:: ./img/bunny.png + :figclass: align-center + + 3D Stanford bunny with 35947 vertices. + + .. automodule:: gudhi.datasets.remote :members: :special-members: diff --git a/src/python/doc/img/bunny.png b/src/python/doc/img/bunny.png new file mode 100644 index 00000000..769aa530 Binary files /dev/null and b/src/python/doc/img/bunny.png differ -- cgit v1.2.3 From ef8284cce27a8f11947e7f076034aa2fd8b5a395 Mon Sep 17 00:00:00 2001 From: Hind-M Date: Wed, 4 May 2022 15:27:34 +0200 Subject: Ask for file_path as parameter of remote fetching functions instead of both dirname and filename Modify remote fetching test --- src/python/gudhi/datasets/remote.py | 106 +++++++++++++++----------------- src/python/test/test_remote_datasets.py | 94 ++++++++++------------------ 2 files changed, 83 insertions(+), 117 deletions(-) diff --git a/src/python/gudhi/datasets/remote.py b/src/python/gudhi/datasets/remote.py index 8b3baef4..5b535911 100644 --- a/src/python/gudhi/datasets/remote.py +++ b/src/python/gudhi/datasets/remote.py @@ -7,7 +7,7 @@ # Modification(s): # - YYYY/MM Author: Description of the modification -from os.path import join, exists, expanduser +from os.path import join, split, exists, expanduser from os import makedirs, remove from urllib.request import urlretrieve @@ -60,7 +60,7 @@ def _checksum_sha256(file_path): Parameters ---------- file_path: string - Full path of the created file. + Full path of the created file including filename. Returns ------- @@ -77,7 +77,7 @@ def _checksum_sha256(file_path): sha256_hash.update(buffer) return sha256_hash.hexdigest() -def _fetch_remote(url, filename, dirname, file_checksum = None, accept_license = False): +def _fetch_remote(url, file_path, file_checksum = None): """ Fetch the wanted dataset from the given url and save it in file_path. @@ -85,21 +85,11 @@ def _fetch_remote(url, filename, dirname, file_checksum = None, accept_license = ---------- url : string The url to fetch the dataset from. - filename : string - The name to give to downloaded file. - dirname : string - The directory to save the file to. + file_path : string + Full path of the downloaded file including filename. file_checksum : string The file checksum using sha256 to check against the one computed on the downloaded file. Default is 'None', which means the checksum is not checked. - accept_license : boolean - Flag to specify if user accepts the file LICENSE and prevents from printing the corresponding license terms. - Default is False. - - Returns - ------- - file_path: string - Full path of the created file. Raises ------ @@ -107,8 +97,6 @@ def _fetch_remote(url, filename, dirname, file_checksum = None, accept_license = If the computed SHA256 checksum of file does not match the one given by the user. """ - file_path = join(dirname, filename) - # Get the file urlretrieve(url, file_path) @@ -121,36 +109,41 @@ def _fetch_remote(url, filename, dirname, file_checksum = None, accept_license = "different from expected : {}." "The file may be corrupted or the given url may be wrong !".format(file_path, checksum, file_checksum)) - # Print license terms unless accept_license is set to True - if not accept_license: - license_file = join(dirname, "LICENSE") - if exists(license_file) and (file_path != license_file): - with open(license_file, 'r') as f: - print(f.read()) +def _get_archive_path(file_path, label): + """ + Get archive path based on file_path given by user and label. - return file_path + Parameters + ---------- + file_path: string + Full path of the file to get including filename, or None. + label: string + Label used along with 'data_home' to get archive path, in case 'file_path' is None. -def _get_archive_and_dir(dirname, filename, label): - if dirname is None: - dirname = join(get_data_home(dirname), label) + Returns + ------- + Full path of archive including filename. + """ + if file_path is None: + archive_path = join(get_data_home(), label) + dirname = split(archive_path)[0] makedirs(dirname, exist_ok=True) else: - dirname = get_data_home(dirname) - - archive_path = join(dirname, filename) + archive_path = file_path + dirname = split(archive_path)[0] + makedirs(dirname, exist_ok=True) - return archive_path, dirname + return archive_path -def fetch_spiral_2d(filename = "spiral_2d.npy", dirname = None): +def fetch_spiral_2d(file_path = None): """ Fetch spiral_2d dataset remotely. Parameters ---------- - filename : string - The name to give to downloaded file. Default is "spiral_2d.npy". - dirname : string - The directory to save the file to. Default is None, meaning that the downloaded file will be put in "~/gudhi_data/points/spiral_2d". + file_path : string + Full path of the downloaded file including filename. + Default is None, meaning that it's set to "data_home/points/spiral_2d/spiral_2d.npy". Returns ------- @@ -158,28 +151,25 @@ def fetch_spiral_2d(filename = "spiral_2d.npy", dirname = None): Array of shape (114562, 2). """ file_url = "https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d/spiral_2d.npy" - file_checksum = '88312ffd6df2e2cb2bde9c0e1f962d7d644c6f58dc369c7b377b298dacdc4eaf' + file_checksum = '2226024da76c073dd2f24b884baefbfd14928b52296df41ad2d9b9dc170f2401' - archive_path, dirname = _get_archive_and_dir(dirname, filename, "points/spiral_2d") + archive_path = _get_archive_path(file_path, "points/spiral_2d/spiral_2d.npy") if not exists(archive_path): - file_path_pkl = _fetch_remote(file_url, filename, dirname, file_checksum) + _fetch_remote(file_url, archive_path, file_checksum) - return np.load(file_path_pkl, mmap_mode='r') - else: - return np.load(archive_path, mmap_mode='r') + return np.load(archive_path, mmap_mode='r') -def fetch_bunny(filename = "bunny.npy", dirname = None, accept_license = False): +def fetch_bunny(file_path = None, accept_license = False): """ Fetch Stanford bunny dataset remotely and its LICENSE file. This dataset contains 35947 vertices. Parameters ---------- - filename : string - The name to give to downloaded file. Default is "bunny.npy". - dirname : string - The directory to save the file to. Default is None, meaning that the downloaded files will be put in "~/gudhi_data/points/bunny". + file_path : string + Full path of the downloaded file including filename. + Default is None, meaning that it's set to "data_home/points/bunny/bunny.npy". accept_license : boolean Flag to specify if user accepts the file LICENSE and prevents from printing the corresponding license terms. Default is False. @@ -191,16 +181,20 @@ def fetch_bunny(filename = "bunny.npy", dirname = None, accept_license = False): """ file_url = "https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/bunny/bunny.npy" - file_checksum = '13f7842ebb4b45370e50641ff28c88685703efa5faab14edf0bb7d113a965e1b' - license_url = "https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/bunny/LICENSE" + file_checksum = 'f382482fd89df8d6444152dc8fd454444fe597581b193fd139725a85af4a6c6e' + license_url = "https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/bunny/bunny.LICENSE" license_checksum = 'b763dbe1b2fc6015d05cbf7bcc686412a2eb100a1f2220296e3b4a644c69633a' - archive_path, dirname = _get_archive_and_dir(dirname, filename, "points/bunny") + archive_path = _get_archive_path(file_path, "points/bunny/bunny.npy") if not exists(archive_path): - license_path = _fetch_remote(license_url, "LICENSE", dirname, license_checksum) - file_path_pkl = _fetch_remote(file_url, filename, dirname, file_checksum, accept_license) - - return np.load(file_path_pkl, mmap_mode='r') - else: - return np.load(archive_path, mmap_mode='r') + _fetch_remote(file_url, archive_path, file_checksum) + license_path = join(split(archive_path)[0], "bunny.LICENSE") + _fetch_remote(license_url, license_path, license_checksum) + # Print license terms unless accept_license is set to True + if not accept_license: + if exists(license_path): + with open(license_path, 'r') as f: + print(f.read()) + + return np.load(archive_path, mmap_mode='r') diff --git a/src/python/test/test_remote_datasets.py b/src/python/test/test_remote_datasets.py index c44ac22b..5d0d397d 100644 --- a/src/python/test/test_remote_datasets.py +++ b/src/python/test/test_remote_datasets.py @@ -9,76 +9,48 @@ from gudhi.datasets import remote -import re import shutil import io import sys import pytest -from os.path import isfile, isdir, expanduser -from os import makedirs +from os.path import isdir, expanduser, exists +from os import remove -def _check_dir_file_names(path_file_dw, filename, dirname): - assert isfile(path_file_dw) +def test_data_home(): + # Test get_data_home and clear_data_home on new empty folder + empty_data_home = remote.get_data_home(data_home="empty_folder_for_test") + assert isdir(empty_data_home) - names_dw = re.split(r' |/|\\', path_file_dw) - # Case where inner directories are created in "test_gudhi_data/"; e.g: "test_gudhi_data/bunny" - if len(names_dw) >= 3: - for i in range(len(names_dw)-1): - assert re.split(r' |/|\\', dirname)[i] == names_dw[i] - assert filename == names_dw[i+1] - else: - assert dirname == names_dw[0] - assert filename == names_dw[1] + remote.clear_data_home(data_home=empty_data_home) + assert not isdir(empty_data_home) -def _check_fetch_output(url, filename, dirname = "test_gudhi_data", file_checksum = None): - makedirs(dirname, exist_ok=True) - path_file_dw = remote._fetch_remote(url, filename, dirname, file_checksum) - _check_dir_file_names(path_file_dw, filename, dirname) +def test_fetch_remote(): + # Test fetch with a wrong checksum + with pytest.raises(OSError): + remote._fetch_remote("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d/spiral_2d.npy", "tmp_spiral_2d.npy", file_checksum = 'XXXXXXXXXX') + assert not exists("tmp_spiral_2d.npy") def _get_bunny_license_print(accept_license = False): capturedOutput = io.StringIO() # Redirect stdout sys.stdout = capturedOutput - makedirs("test_gudhi_data/bunny", exist_ok=True) + bunny_arr = remote.fetch_bunny("./tmp_for_test/bunny.npy", accept_license) + assert bunny_arr.shape == (35947, 3) + remove("./tmp_for_test/bunny.npy") - remote._fetch_remote("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/bunny/bunny.npy", "bunny.npy", "test_gudhi_data/bunny", - '13f7842ebb4b45370e50641ff28c88685703efa5faab14edf0bb7d113a965e1b', accept_license) # Reset redirect sys.stdout = sys.__stdout__ return capturedOutput -def test_fetch_remote_datasets(): - # Test fetch with a wrong checksum - with pytest.raises(OSError): - _check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d/spiral_2d.npy", "spiral_2d.npy", file_checksum = 'XXXXXXXXXX') - - # Test files download from given urls with checksums provided - _check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d/spiral_2d.npy", "spiral_2d.npy", - file_checksum = '88312ffd6df2e2cb2bde9c0e1f962d7d644c6f58dc369c7b377b298dacdc4eaf') - - _check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/sphere3D_pts_on_grid.off", "sphere3D_pts_on_grid.off", - file_checksum = '32f96d2cafb1177f0dd5e0a019b6ff5658e14a619a7815ae55ad0fc5e8bd3f88') - - # Test files download from given urls without checksums - _check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d/spiral_2d.npy", "spiral_2d.npy") - - _check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/sphere3D_pts_on_grid.off", "sphere3D_pts_on_grid.off") - - # Test printing existing LICENSE file when fetching bunny.npy with accept_license = False (default) - # Fetch LICENSE file - makedirs("test_gudhi_data/bunny", exist_ok=True) - remote._fetch_remote("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/bunny/LICENSE", "LICENSE", "test_gudhi_data/bunny", - 'b763dbe1b2fc6015d05cbf7bcc686412a2eb100a1f2220296e3b4a644c69633a') - with open("test_gudhi_data/bunny/LICENSE") as f: - assert f.read().rstrip("\n") == _get_bunny_license_print().getvalue().rstrip("\n") - +def test_print_bunny_license(): # Test not printing bunny.npy LICENSE when accept_license = True assert "" == _get_bunny_license_print(accept_license = True).getvalue() - - # Remove "test_gudhi_data" directory and all its content - shutil.rmtree("test_gudhi_data") + # Test printing bunny.LICENSE file when fetching bunny.npy with accept_license = False (default) + with open("./tmp_for_test/bunny.LICENSE") as f: + assert f.read().rstrip("\n") == _get_bunny_license_print().getvalue().rstrip("\n") + shutil.rmtree("./tmp_for_test") def test_fetch_remote_datasets_wrapped(): # Check if gudhi_data default dir exists already @@ -93,27 +65,27 @@ def test_fetch_remote_datasets_wrapped(): # Check that default dir was created assert isdir(expanduser("~/gudhi_data")) + # Check downloaded files + assert exists(expanduser("~/gudhi_data/points/spiral_2d/spiral_2d.npy")) + assert exists(expanduser("~/gudhi_data/points/bunny/bunny.npy")) + assert exists(expanduser("~/gudhi_data/points/bunny/bunny.LICENSE")) # Test fetch_spiral_2d and fetch_bunny wrapping functions with data directory different from default - spiral_2d_arr = remote.fetch_spiral_2d(dirname = "./another_fetch_folder_for_test") + spiral_2d_arr = remote.fetch_spiral_2d("./another_fetch_folder_for_test/spiral_2d.npy") assert spiral_2d_arr.shape == (114562, 2) - bunny_arr = remote.fetch_bunny(dirname = "./another_fetch_folder_for_test") + bunny_arr = remote.fetch_bunny("./another_fetch_folder_for_test/bunny.npy") assert bunny_arr.shape == (35947, 3) - assert isdir(expanduser("./another_fetch_folder_for_test")) + assert isdir("./another_fetch_folder_for_test") + # Check downloaded files + assert exists("./another_fetch_folder_for_test/spiral_2d.npy") + assert exists("./another_fetch_folder_for_test/bunny.npy") + assert exists("./another_fetch_folder_for_test/bunny.LICENSE") # Remove test folders del spiral_2d_arr del bunny_arr if to_be_removed: shutil.rmtree(expanduser("~/gudhi_data")) - shutil.rmtree(expanduser("./another_fetch_folder_for_test")) - -def test_data_home(): - # Test get_data_home and clear_data_home on new empty folder - empty_data_home = remote.get_data_home(data_home="empty_folder_for_test") - assert isdir(empty_data_home) - - remote.clear_data_home(data_home=empty_data_home) - assert not isdir(empty_data_home) + shutil.rmtree("./another_fetch_folder_for_test") -- cgit v1.2.3 From 52d5b524403a43bfdc0b27a7feeec04e9c9c34c2 Mon Sep 17 00:00:00 2001 From: Hind-M Date: Thu, 5 May 2022 17:43:12 +0200 Subject: Add GUDHI_DATA environment variable option --- src/python/gudhi/datasets/remote.py | 16 +++++++++++----- src/python/test/test_remote_datasets.py | 13 ++++++++++++- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/src/python/gudhi/datasets/remote.py b/src/python/gudhi/datasets/remote.py index 5b535911..eac8caf3 100644 --- a/src/python/gudhi/datasets/remote.py +++ b/src/python/gudhi/datasets/remote.py @@ -8,7 +8,7 @@ # - YYYY/MM Author: Description of the modification from os.path import join, split, exists, expanduser -from os import makedirs, remove +from os import makedirs, remove, environ from urllib.request import urlretrieve import hashlib @@ -21,13 +21,16 @@ def get_data_home(data_home = None): Return the path of the remote datasets directory. This folder is used to store remotely fetched datasets. By default the datasets directory is set to a folder named 'gudhi_data' in the user home folder. - Alternatively, it can be set by giving an explicit folder path. The '~' symbol is expanded to the user home folder. + Alternatively, it can be set by the 'GUDHI_DATA' environment variable. + The '~' symbol is expanded to the user home folder. If the folder does not already exist, it is automatically created. Parameters ---------- data_home : string - The path to remote datasets directory. Default is `None`, meaning that the data home directory will be set to "~/gudhi_data". + The path to remote datasets directory. + Default is `None`, meaning that the data home directory will be set to "~/gudhi_data", + if the 'GUDHI_DATA' environment variable does not exist. Returns ------- @@ -35,7 +38,7 @@ def get_data_home(data_home = None): The path to remote datasets directory. """ if data_home is None: - data_home = join("~", "gudhi_data") + data_home = environ.get("GUDHI_DATA", join("~", "gudhi_data")) data_home = expanduser(data_home) makedirs(data_home, exist_ok=True) return data_home @@ -48,7 +51,9 @@ def clear_data_home(data_home = None): Parameters ---------- data_home : string, default is None. - The path to remote datasets directory. If `None`, the default directory to be removed is set to "~/gudhi_data". + The path to remote datasets directory. + If `None` and the 'GUDHI_DATA' environment variable does not exist, + the default directory to be removed is set to "~/gudhi_data". """ data_home = get_data_home(data_home) shutil.rmtree(data_home) @@ -170,6 +175,7 @@ def fetch_bunny(file_path = None, accept_license = False): file_path : string Full path of the downloaded file including filename. Default is None, meaning that it's set to "data_home/points/bunny/bunny.npy". + In this case, the LICENSE file would be downloaded as "data_home/points/bunny/bunny.LICENSE". accept_license : boolean Flag to specify if user accepts the file LICENSE and prevents from printing the corresponding license terms. Default is False. diff --git a/src/python/test/test_remote_datasets.py b/src/python/test/test_remote_datasets.py index 5d0d397d..af26d77c 100644 --- a/src/python/test/test_remote_datasets.py +++ b/src/python/test/test_remote_datasets.py @@ -15,7 +15,7 @@ import sys import pytest from os.path import isdir, expanduser, exists -from os import remove +from os import remove, environ def test_data_home(): # Test get_data_home and clear_data_home on new empty folder @@ -89,3 +89,14 @@ def test_fetch_remote_datasets_wrapped(): if to_be_removed: shutil.rmtree(expanduser("~/gudhi_data")) shutil.rmtree("./another_fetch_folder_for_test") + +def test_gudhi_data_env(): + # Set environment variable "GUDHI_DATA" + environ["GUDHI_DATA"] = "./test_folder_from_env_var" + bunny_arr = remote.fetch_bunny() + assert bunny_arr.shape == (35947, 3) + assert exists("./test_folder_from_env_var/points/bunny/bunny.npy") + assert exists("./test_folder_from_env_var/points/bunny/bunny.LICENSE") + # Remove test folder + del bunny_arr + shutil.rmtree("./test_folder_from_env_var") -- cgit v1.2.3 From f344700ebee65de9ccc8799f2ec4e1c633ab864e Mon Sep 17 00:00:00 2001 From: Hind-M Date: Thu, 5 May 2022 18:07:52 +0200 Subject: Remove default data home test (because of 'GUDHI_DATA' environment variable option) --- src/python/test/test_remote_datasets.py | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/src/python/test/test_remote_datasets.py b/src/python/test/test_remote_datasets.py index af26d77c..6f569fd2 100644 --- a/src/python/test/test_remote_datasets.py +++ b/src/python/test/test_remote_datasets.py @@ -53,30 +53,16 @@ def test_print_bunny_license(): shutil.rmtree("./tmp_for_test") def test_fetch_remote_datasets_wrapped(): - # Check if gudhi_data default dir exists already - to_be_removed = not isdir(expanduser("~/gudhi_data")) - # Test fetch_spiral_2d and fetch_bunny wrapping functions (twice, to test case of already fetched files) + # Test fetch_spiral_2d and fetch_bunny wrapping functions with data directory different from default (twice, to test case of already fetched files) + # Default case is not tested because it would fail in case the user sets the 'GUDHI_DATA' environment variable locally for i in range(2): - spiral_2d_arr = remote.fetch_spiral_2d() + spiral_2d_arr = remote.fetch_spiral_2d("./another_fetch_folder_for_test/spiral_2d.npy") assert spiral_2d_arr.shape == (114562, 2) - bunny_arr = remote.fetch_bunny() + bunny_arr = remote.fetch_bunny("./another_fetch_folder_for_test/bunny.npy") assert bunny_arr.shape == (35947, 3) - # Check that default dir was created - assert isdir(expanduser("~/gudhi_data")) - # Check downloaded files - assert exists(expanduser("~/gudhi_data/points/spiral_2d/spiral_2d.npy")) - assert exists(expanduser("~/gudhi_data/points/bunny/bunny.npy")) - assert exists(expanduser("~/gudhi_data/points/bunny/bunny.LICENSE")) - - # Test fetch_spiral_2d and fetch_bunny wrapping functions with data directory different from default - spiral_2d_arr = remote.fetch_spiral_2d("./another_fetch_folder_for_test/spiral_2d.npy") - assert spiral_2d_arr.shape == (114562, 2) - - bunny_arr = remote.fetch_bunny("./another_fetch_folder_for_test/bunny.npy") - assert bunny_arr.shape == (35947, 3) - + # Check that the directory was created assert isdir("./another_fetch_folder_for_test") # Check downloaded files assert exists("./another_fetch_folder_for_test/spiral_2d.npy") @@ -86,8 +72,6 @@ def test_fetch_remote_datasets_wrapped(): # Remove test folders del spiral_2d_arr del bunny_arr - if to_be_removed: - shutil.rmtree(expanduser("~/gudhi_data")) shutil.rmtree("./another_fetch_folder_for_test") def test_gudhi_data_env(): -- cgit v1.2.3 From a809771b6d7381d233656f7a0b02211559189bfe Mon Sep 17 00:00:00 2001 From: Hind-M Date: Fri, 6 May 2022 09:52:26 +0200 Subject: Delete bunny array before removing the file --- src/python/test/test_remote_datasets.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/python/test/test_remote_datasets.py b/src/python/test/test_remote_datasets.py index 6f569fd2..cde9fa22 100644 --- a/src/python/test/test_remote_datasets.py +++ b/src/python/test/test_remote_datasets.py @@ -38,6 +38,7 @@ def _get_bunny_license_print(accept_license = False): bunny_arr = remote.fetch_bunny("./tmp_for_test/bunny.npy", accept_license) assert bunny_arr.shape == (35947, 3) + del bunny_arr remove("./tmp_for_test/bunny.npy") # Reset redirect -- cgit v1.2.3 From dcd4204d62a4c9a4f3d9ebc61341fba25ae19687 Mon Sep 17 00:00:00 2001 From: Hind-M Date: Tue, 24 May 2022 11:44:49 +0200 Subject: Use autofunction instead of automodule in doc and add 2d spiral image --- src/python/doc/datasets.rst | 16 ++++++++++++---- src/python/doc/img/spiral_2d.png | Bin 0 -> 279276 bytes 2 files changed, 12 insertions(+), 4 deletions(-) create mode 100644 src/python/doc/img/spiral_2d.png diff --git a/src/python/doc/datasets.rst b/src/python/doc/datasets.rst index 62b7dca0..d2975533 100644 --- a/src/python/doc/datasets.rst +++ b/src/python/doc/datasets.rst @@ -112,13 +112,21 @@ Fetching datasets We provide some ready-to-use datasets that are not available by default when getting GUDHI, and need to be fetched explicitly. +.. autofunction:: gudhi.datasets.remote.fetch_bunny + .. figure:: ./img/bunny.png :figclass: align-center 3D Stanford bunny with 35947 vertices. -.. automodule:: gudhi.datasets.remote - :members: - :special-members: - :show-inheritance: +.. autofunction:: gudhi.datasets.remote.fetch_spiral_2d + +.. figure:: ./img/spiral_2d.png + :figclass: align-center + + 2D spiral with 114562 vertices. + +.. autofunction:: gudhi.datasets.remote.get_data_home + +.. autofunction:: gudhi.datasets.remote.clear_data_home diff --git a/src/python/doc/img/spiral_2d.png b/src/python/doc/img/spiral_2d.png new file mode 100644 index 00000000..abd247cd Binary files /dev/null and b/src/python/doc/img/spiral_2d.png differ -- cgit v1.2.3 From 4d2f5a1c165204765a04594a9f1f6ba9bcb939ba Mon Sep 17 00:00:00 2001 From: Hind-M Date: Tue, 24 May 2022 11:46:16 +0200 Subject: Specify in doc the use of cache when fetching datasets with wrapping functions --- src/python/gudhi/datasets/remote.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/python/gudhi/datasets/remote.py b/src/python/gudhi/datasets/remote.py index eac8caf3..d2ae2a75 100644 --- a/src/python/gudhi/datasets/remote.py +++ b/src/python/gudhi/datasets/remote.py @@ -143,6 +143,8 @@ def _get_archive_path(file_path, label): def fetch_spiral_2d(file_path = None): """ Fetch spiral_2d dataset remotely. + Note that if the dataset already exists in the target location, it is not downloaded again, + and the corresponding array is returned from cache. Parameters ---------- @@ -169,6 +171,8 @@ def fetch_bunny(file_path = None, accept_license = False): """ Fetch Stanford bunny dataset remotely and its LICENSE file. This dataset contains 35947 vertices. + Note that if the dataset already exists in the target location, it is not downloaded again, + and the corresponding array is returned from cache. Parameters ---------- -- cgit v1.2.3 From ce34ee3e5c28c48d605f23332cfa3c10e471a047 Mon Sep 17 00:00:00 2001 From: Hind-M Date: Tue, 24 May 2022 15:57:52 +0200 Subject: Make get_data_home function private --- src/python/doc/datasets.rst | 2 -- src/python/gudhi/datasets/remote.py | 6 +++--- src/python/test/test_remote_datasets.py | 4 ++-- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/python/doc/datasets.rst b/src/python/doc/datasets.rst index d2975533..8b0912c4 100644 --- a/src/python/doc/datasets.rst +++ b/src/python/doc/datasets.rst @@ -127,6 +127,4 @@ We provide some ready-to-use datasets that are not available by default when get 2D spiral with 114562 vertices. -.. autofunction:: gudhi.datasets.remote.get_data_home - .. autofunction:: gudhi.datasets.remote.clear_data_home diff --git a/src/python/gudhi/datasets/remote.py b/src/python/gudhi/datasets/remote.py index d2ae2a75..7e6f647f 100644 --- a/src/python/gudhi/datasets/remote.py +++ b/src/python/gudhi/datasets/remote.py @@ -16,7 +16,7 @@ import shutil import numpy as np -def get_data_home(data_home = None): +def _get_data_home(data_home = None): """ Return the path of the remote datasets directory. This folder is used to store remotely fetched datasets. @@ -55,7 +55,7 @@ def clear_data_home(data_home = None): If `None` and the 'GUDHI_DATA' environment variable does not exist, the default directory to be removed is set to "~/gudhi_data". """ - data_home = get_data_home(data_home) + data_home = _get_data_home(data_home) shutil.rmtree(data_home) def _checksum_sha256(file_path): @@ -130,7 +130,7 @@ def _get_archive_path(file_path, label): Full path of archive including filename. """ if file_path is None: - archive_path = join(get_data_home(), label) + archive_path = join(_get_data_home(), label) dirname = split(archive_path)[0] makedirs(dirname, exist_ok=True) else: diff --git a/src/python/test/test_remote_datasets.py b/src/python/test/test_remote_datasets.py index cde9fa22..e5d2de82 100644 --- a/src/python/test/test_remote_datasets.py +++ b/src/python/test/test_remote_datasets.py @@ -18,8 +18,8 @@ from os.path import isdir, expanduser, exists from os import remove, environ def test_data_home(): - # Test get_data_home and clear_data_home on new empty folder - empty_data_home = remote.get_data_home(data_home="empty_folder_for_test") + # Test _get_data_home and clear_data_home on new empty folder + empty_data_home = remote._get_data_home(data_home="empty_folder_for_test") assert isdir(empty_data_home) remote.clear_data_home(data_home=empty_data_home) -- cgit v1.2.3 From 899fb73b33cb6976c39a42ba26a31cf2acde63ee Mon Sep 17 00:00:00 2001 From: Hind-M Date: Wed, 25 May 2022 16:53:04 +0200 Subject: Add info in the doc concerning default data_home and 'GUDHI_DATA' env variable --- src/python/doc/datasets.rst | 3 +++ src/python/gudhi/datasets/remote.py | 13 +++++++++++++ 2 files changed, 16 insertions(+) diff --git a/src/python/doc/datasets.rst b/src/python/doc/datasets.rst index 8b0912c4..2d11a19d 100644 --- a/src/python/doc/datasets.rst +++ b/src/python/doc/datasets.rst @@ -112,6 +112,9 @@ Fetching datasets We provide some ready-to-use datasets that are not available by default when getting GUDHI, and need to be fetched explicitly. +By **default**, the fetched datasets directory is set to a folder named **'gudhi_data'** in the **user home folder**. +Alternatively, it can be set using the **'GUDHI_DATA'** environment variable. + .. autofunction:: gudhi.datasets.remote.fetch_bunny .. figure:: ./img/bunny.png diff --git a/src/python/gudhi/datasets/remote.py b/src/python/gudhi/datasets/remote.py index 7e6f647f..48bdcfa6 100644 --- a/src/python/gudhi/datasets/remote.py +++ b/src/python/gudhi/datasets/remote.py @@ -143,6 +143,7 @@ def _get_archive_path(file_path, label): def fetch_spiral_2d(file_path = None): """ Fetch spiral_2d dataset remotely. + Note that if the dataset already exists in the target location, it is not downloaded again, and the corresponding array is returned from cache. @@ -150,8 +151,12 @@ def fetch_spiral_2d(file_path = None): ---------- file_path : string Full path of the downloaded file including filename. + Default is None, meaning that it's set to "data_home/points/spiral_2d/spiral_2d.npy". + The "data_home" directory is set by default to "~/gudhi_data", + unless the 'GUDHI_DATA' environment variable is set. + Returns ------- points: numpy array @@ -170,7 +175,9 @@ def fetch_spiral_2d(file_path = None): def fetch_bunny(file_path = None, accept_license = False): """ Fetch Stanford bunny dataset remotely and its LICENSE file. + This dataset contains 35947 vertices. + Note that if the dataset already exists in the target location, it is not downloaded again, and the corresponding array is returned from cache. @@ -178,10 +185,16 @@ def fetch_bunny(file_path = None, accept_license = False): ---------- file_path : string Full path of the downloaded file including filename. + Default is None, meaning that it's set to "data_home/points/bunny/bunny.npy". In this case, the LICENSE file would be downloaded as "data_home/points/bunny/bunny.LICENSE". + + The "data_home" directory is set by default to "~/gudhi_data", + unless the 'GUDHI_DATA' environment variable is set. + accept_license : boolean Flag to specify if user accepts the file LICENSE and prevents from printing the corresponding license terms. + Default is False. Returns -- cgit v1.2.3 From 2d20991dd44c621b7becd06c086948f666de4da4 Mon Sep 17 00:00:00 2001 From: Hind-M Date: Tue, 7 Jun 2022 14:57:41 +0200 Subject: Rephrase description for fetch functions --- src/python/gudhi/datasets/remote.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/python/gudhi/datasets/remote.py b/src/python/gudhi/datasets/remote.py index 48bdcfa6..f6d3fe56 100644 --- a/src/python/gudhi/datasets/remote.py +++ b/src/python/gudhi/datasets/remote.py @@ -142,7 +142,7 @@ def _get_archive_path(file_path, label): def fetch_spiral_2d(file_path = None): """ - Fetch spiral_2d dataset remotely. + Load the spiral_2d dataset. Note that if the dataset already exists in the target location, it is not downloaded again, and the corresponding array is returned from cache. @@ -174,7 +174,7 @@ def fetch_spiral_2d(file_path = None): def fetch_bunny(file_path = None, accept_license = False): """ - Fetch Stanford bunny dataset remotely and its LICENSE file. + Load the Stanford bunny dataset. This dataset contains 35947 vertices. -- cgit v1.2.3 From bcc2c9584dc07d1cfcb870746110c524827d3bfa Mon Sep 17 00:00:00 2001 From: Vincent Rouvreau Date: Wed, 15 Jun 2022 09:35:49 +0200 Subject: Use boundary_opposite_vertex_simplex_range instead of boundary_simplex_range in alpha --- src/Alpha_complex/include/gudhi/Alpha_complex.h | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/Alpha_complex/include/gudhi/Alpha_complex.h b/src/Alpha_complex/include/gudhi/Alpha_complex.h index b1a9407b..5a0f0643 100644 --- a/src/Alpha_complex/include/gudhi/Alpha_complex.h +++ b/src/Alpha_complex/include/gudhi/Alpha_complex.h @@ -464,7 +464,8 @@ class Alpha_complex { using Vertex_handle = typename SimplicialComplexForAlpha::Vertex_handle; // ### Foreach Tau face of Sigma - for (auto f_boundary : complex.boundary_simplex_range(f_simplex)) { + for (auto face_opposite_vertex : complex.boundary_opposite_vertex_simplex_range(f_simplex)) { + auto f_boundary = face_opposite_vertex.first; #ifdef DEBUG_TRACES std::clog << " | --------------------------------------------------\n"; std::clog << " | Tau "; @@ -485,16 +486,8 @@ class Alpha_complex { #endif // DEBUG_TRACES // ### Else } else { - // Find which vertex of f_simplex is missing in f_boundary. We could actually write a variant of boundary_simplex_range that gives pairs (f_boundary, vertex). We rely on the fact that simplex_vertex_range is sorted. - auto longlist = complex.simplex_vertex_range(f_simplex); - auto shortlist = complex.simplex_vertex_range(f_boundary); - auto longiter = std::begin(longlist); - auto shortiter = std::begin(shortlist); - auto enditer = std::end(shortlist); - while(shortiter != enditer && *longiter == *shortiter) { ++longiter; ++shortiter; } - Vertex_handle extra = *longiter; auto const& cache=get_cache(complex, f_boundary); - bool is_gab = kernel_.is_gabriel(cache, get_point_(extra)); + bool is_gab = kernel_.is_gabriel(cache, get_point_(face_opposite_vertex.second)); #ifdef DEBUG_TRACES std::clog << " | Tau is_gabriel(Sigma)=" << is_gab << " - vertexForGabriel=" << extra << std::endl; #endif // DEBUG_TRACES -- cgit v1.2.3 From 70cc462f71703f011fd5b8ba9da668b58f09059c Mon Sep 17 00:00:00 2001 From: Vincent Rouvreau Date: Wed, 15 Jun 2022 09:41:57 +0200 Subject: Fix warning and debug traces --- src/Alpha_complex/include/gudhi/Alpha_complex.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Alpha_complex/include/gudhi/Alpha_complex.h b/src/Alpha_complex/include/gudhi/Alpha_complex.h index 5a0f0643..aec8c1b1 100644 --- a/src/Alpha_complex/include/gudhi/Alpha_complex.h +++ b/src/Alpha_complex/include/gudhi/Alpha_complex.h @@ -461,7 +461,6 @@ class Alpha_complex { void propagate_alpha_filtration(SimplicialComplexForAlpha& complex, Simplex_handle f_simplex) { // From SimplicialComplexForAlpha type required to assign filtration values. using Filtration_value = typename SimplicialComplexForAlpha::Filtration_value; - using Vertex_handle = typename SimplicialComplexForAlpha::Vertex_handle; // ### Foreach Tau face of Sigma for (auto face_opposite_vertex : complex.boundary_opposite_vertex_simplex_range(f_simplex)) { @@ -489,7 +488,7 @@ class Alpha_complex { auto const& cache=get_cache(complex, f_boundary); bool is_gab = kernel_.is_gabriel(cache, get_point_(face_opposite_vertex.second)); #ifdef DEBUG_TRACES - std::clog << " | Tau is_gabriel(Sigma)=" << is_gab << " - vertexForGabriel=" << extra << std::endl; + std::clog << " | Tau is_gabriel(Sigma)=" << is_gab << " - vertexForGabriel=" << face_opposite_vertex.second << std::endl; #endif // DEBUG_TRACES // ### If Tau is not Gabriel of Sigma if (false == is_gab) { -- cgit v1.2.3 From 868369dd61fb6ef475ffa3af724907927121b6bb Mon Sep 17 00:00:00 2001 From: Hind-M Date: Thu, 16 Jun 2022 15:54:21 +0200 Subject: Add exact option for exact cech variant --- .../benchmark/cech_complex_benchmark.cpp | 22 ++++++++++++++-------- src/Cech_complex/include/gudhi/Cech_complex.h | 6 ++++-- .../include/gudhi/Cech_complex_blocker.h | 21 ++++++++++++++------- 3 files changed, 32 insertions(+), 17 deletions(-) diff --git a/src/Cech_complex/benchmark/cech_complex_benchmark.cpp b/src/Cech_complex/benchmark/cech_complex_benchmark.cpp index d2a71879..19142780 100644 --- a/src/Cech_complex/benchmark/cech_complex_benchmark.cpp +++ b/src/Cech_complex/benchmark/cech_complex_benchmark.cpp @@ -31,7 +31,7 @@ using Points_off_reader = Gudhi::Points_off_reader; using Rips_complex = Gudhi::rips_complex::Rips_complex; template -Simplex_tree benchmark_cech(const std::string& off_file_points, const Filtration_value& radius, const int& dim_max) { +Simplex_tree benchmark_cech(const std::string& off_file_points, const Filtration_value& radius, const int& dim_max, const bool exact) { using Point_cgal = typename Kernel::Point_d; using Points_off_reader_cgal = Gudhi::Points_off_reader; using Cech_complex = Gudhi::cech_complex::Cech_complex; @@ -42,7 +42,7 @@ Simplex_tree benchmark_cech(const std::string& off_file_points, const Filtration Gudhi::Clock cech_clock("Cech computation"); Cech_complex cech_complex_from_points(off_reader_cgal.get_point_cloud(), radius); Simplex_tree cech_stree; - cech_complex_from_points.create_complex(cech_stree, dim_max); + cech_complex_from_points.create_complex(cech_stree, dim_max, exact); // ------------------------------------------ // Display information about the Cech complex @@ -56,8 +56,9 @@ int main(int argc, char* argv[]) { boost::filesystem::path full_path(boost::filesystem::current_path()); std::clog << "Current path is : " << full_path << std::endl; - std::clog << "File name ; Radius ; Rips time ; Dim-3 Epick Cech time ; Dynamic_dim Epick Cech time ; " - "Dim-3 Epeck Cech time ; Dynamic_dim Epeck Cech time ; Cech nb simplices ; Rips nb simplices;" + std::clog << "File name ; Radius ; Rips time ; Dim-3 Fast Cech time ; Dynamic_dim Fast Cech time ; " + "Dim-3 Safe Cech time ; Dynamic_dim Safe Cech time ; Dim-3 Exact Cech time ; Dynamic_dim Exact Cech time ; " + "Cech nb simplices ; Rips nb simplices;" << std::endl; boost::filesystem::directory_iterator end_itr; // default construction yields past-the-end for (boost::filesystem::directory_iterator itr(boost::filesystem::current_path()); itr != end_itr; ++itr) { @@ -83,10 +84,15 @@ int main(int argc, char* argv[]) { // -------------- // Cech complex // -------------- - benchmark_cech>>(itr->path().string(), radius, p0.size() - 1); - benchmark_cech>(itr->path().string(), radius, p0.size() - 1); - benchmark_cech>>(itr->path().string(), radius, p0.size() - 1); - auto cech_stree = benchmark_cech>(itr->path().string(), radius, p0.size() - 1); + // Fast + benchmark_cech>>(itr->path().string(), radius, p0.size() - 1, false); + benchmark_cech>(itr->path().string(), radius, p0.size() - 1, false); + // Safe + benchmark_cech>>(itr->path().string(), radius, p0.size() - 1, false); + benchmark_cech>(itr->path().string(), radius, p0.size() - 1, false); + // Exact + benchmark_cech>>(itr->path().string(), radius, p0.size() - 1, true); + auto cech_stree = benchmark_cech>(itr->path().string(), radius, p0.size() - 1, true); std::clog << cech_stree.num_simplices() << " ; "; std::clog << rips_stree.num_simplices() << ";" << std::endl; diff --git a/src/Cech_complex/include/gudhi/Cech_complex.h b/src/Cech_complex/include/gudhi/Cech_complex.h index fc39f75b..2c6d3df5 100644 --- a/src/Cech_complex/include/gudhi/Cech_complex.h +++ b/src/Cech_complex/include/gudhi/Cech_complex.h @@ -78,17 +78,19 @@ class Cech_complex { * * @param[in] complex SimplicialComplexForCech to be created. * @param[in] dim_max graph expansion until this given maximal dimension. + * @param[in] exact Exact filtration values computation. Not exact if `Kernel` is not CGAL::Epeck_d. * @exception std::invalid_argument In debug mode, if `complex.num_vertices()` does not return 0. * */ - void create_complex(SimplicialComplexForCechComplex& complex, int dim_max) { + void create_complex(SimplicialComplexForCechComplex& complex, int dim_max, const bool exact = false) { GUDHI_CHECK(complex.num_vertices() == 0, std::invalid_argument("Cech_complex::create_complex - simplicial complex is not empty")); // insert the proximity graph in the simplicial complex complex.insert_graph(cech_skeleton_graph_); // expand the graph until dimension dim_max - complex.expansion_with_blockers(dim_max, cech_blocker(&complex, this)); + complex.expansion_with_blockers(dim_max, cech_blocker(&complex, this, exact)); } /** @return max_radius value given at construction. */ diff --git a/src/Cech_complex/include/gudhi/Cech_complex_blocker.h b/src/Cech_complex/include/gudhi/Cech_complex_blocker.h index 3141d27a..087390b6 100644 --- a/src/Cech_complex/include/gudhi/Cech_complex_blocker.h +++ b/src/Cech_complex/include/gudhi/Cech_complex_blocker.h @@ -94,9 +94,9 @@ class Cech_blocker { Point_cloud face_points; for (auto vertex : sc_ptr_->simplex_vertex_range(face)) { face_points.push_back(cc_ptr_->get_point(vertex)); - #ifdef DEBUG_TRACES - std::clog << "#(" << vertex << ")#"; - #endif // DEBUG_TRACES +#ifdef DEBUG_TRACES + std::clog << "#(" << vertex << ")#"; +#endif // DEBUG_TRACES } sph = get_sphere(face_points.cbegin(), face_points.cend()); // Put edge sphere in cache @@ -107,10 +107,13 @@ class Cech_blocker { } // Check if the minimal enclosing ball of current face contains the extra point if (kernel_.squared_distance_d_object()(sph.first, cc_ptr_->get_point(extra)) <= sph.second) { - #ifdef DEBUG_TRACES - std::clog << "center: " << sph.first << ", radius: " << radius << std::endl; - #endif // DEBUG_TRACES +#ifdef DEBUG_TRACES + std::clog << "center: " << sph.first << ", radius: " << radius << std::endl; +#endif // DEBUG_TRACES is_min_enclos_ball = true; +#if CGAL_VERSION_NR >= 1050000000 + if(exact_) CGAL::exact(sph.second); +#endif radius = std::sqrt(cast_to_fv(sph.second)); sc_ptr_->assign_key(sh, cc_ptr_->get_cache().size()); cc_ptr_->get_cache().push_back(sph); @@ -124,6 +127,9 @@ class Cech_blocker { points.push_back(cc_ptr_->get_point(vertex)); } Sphere sph = get_sphere(points.cbegin(), points.cend()); +#if CGAL_VERSION_NR >= 1050000000 + if(exact_) CGAL::exact(sph.second); +#endif radius = std::sqrt(cast_to_fv(sph.second)); sc_ptr_->assign_key(sh, cc_ptr_->get_cache().size()); @@ -138,12 +144,13 @@ class Cech_blocker { } /** \internal \brief Čech complex blocker constructor. */ - Cech_blocker(SimplicialComplexForCech* sc_ptr, Cech_complex* cc_ptr) : sc_ptr_(sc_ptr), cc_ptr_(cc_ptr) {} + Cech_blocker(SimplicialComplexForCech* sc_ptr, Cech_complex* cc_ptr, const bool exact) : sc_ptr_(sc_ptr), cc_ptr_(cc_ptr), exact_(exact) {} private: SimplicialComplexForCech* sc_ptr_; Cech_complex* cc_ptr_; Kernel kernel_; + const bool exact_; }; } // namespace cech_complex -- cgit v1.2.3 From 3fa972970514333d4db22ec7628c5c1a4de3c6e8 Mon Sep 17 00:00:00 2001 From: Hind-M Date: Tue, 21 Jun 2022 15:04:27 +0200 Subject: -Add/modify some comments -Some other minor changes -Change license to LGPL --- .../benchmark/cech_complex_benchmark.cpp | 20 +++++++++++--------- src/Cech_complex/include/gudhi/Cech_complex.h | 2 +- .../include/gudhi/Cech_complex_blocker.h | 2 +- src/common/doc/main_page.md | 2 +- 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/Cech_complex/benchmark/cech_complex_benchmark.cpp b/src/Cech_complex/benchmark/cech_complex_benchmark.cpp index 19142780..a9dc5d0d 100644 --- a/src/Cech_complex/benchmark/cech_complex_benchmark.cpp +++ b/src/Cech_complex/benchmark/cech_complex_benchmark.cpp @@ -61,20 +61,22 @@ int main(int argc, char* argv[]) { "Cech nb simplices ; Rips nb simplices;" << std::endl; boost::filesystem::directory_iterator end_itr; // default construction yields past-the-end + // For every ".off" file in the current directory, and for 3 predefined thresholds, compare Rips and various Cech constructions for (boost::filesystem::directory_iterator itr(boost::filesystem::current_path()); itr != end_itr; ++itr) { if (!boost::filesystem::is_directory(itr->status())) { if (itr->path().extension() == ".off") { Points_off_reader off_reader(itr->path().string()); Point p0 = off_reader.get_point_cloud()[0]; - - for (Filtration_value radius = 0.1; radius < 0.4; radius += 0.1) { + // Loop over the different thresholds + for (Filtration_value radius = 0.1; radius < 0.35; radius += 0.1) { std::clog << itr->path().stem() << " ; "; std::clog << radius << " ; "; Gudhi::Clock rips_clock("Rips computation"); Rips_complex rips_complex_from_points(off_reader.get_point_cloud(), radius, Gudhi::Euclidean_distance()); Simplex_tree rips_stree; - rips_complex_from_points.create_complex(rips_stree, p0.size() - 1); + int dim_max = p0.size() - 1; + rips_complex_from_points.create_complex(rips_stree, dim_max); // ------------------------------------------ // Display information about the Rips complex // ------------------------------------------ @@ -85,14 +87,14 @@ int main(int argc, char* argv[]) { // Cech complex // -------------- // Fast - benchmark_cech>>(itr->path().string(), radius, p0.size() - 1, false); - benchmark_cech>(itr->path().string(), radius, p0.size() - 1, false); + benchmark_cech>>(itr->path().string(), radius, dim_max, false); + benchmark_cech>(itr->path().string(), radius, dim_max, false); // Safe - benchmark_cech>>(itr->path().string(), radius, p0.size() - 1, false); - benchmark_cech>(itr->path().string(), radius, p0.size() - 1, false); + benchmark_cech>>(itr->path().string(), radius, dim_max, false); + benchmark_cech>(itr->path().string(), radius, dim_max, false); // Exact - benchmark_cech>>(itr->path().string(), radius, p0.size() - 1, true); - auto cech_stree = benchmark_cech>(itr->path().string(), radius, p0.size() - 1, true); + benchmark_cech>>(itr->path().string(), radius, dim_max, true); + auto cech_stree = benchmark_cech>(itr->path().string(), radius, dim_max, true); std::clog << cech_stree.num_simplices() << " ; "; std::clog << rips_stree.num_simplices() << ";" << std::endl; diff --git a/src/Cech_complex/include/gudhi/Cech_complex.h b/src/Cech_complex/include/gudhi/Cech_complex.h index 2c6d3df5..bae21d28 100644 --- a/src/Cech_complex/include/gudhi/Cech_complex.h +++ b/src/Cech_complex/include/gudhi/Cech_complex.h @@ -30,7 +30,7 @@ namespace cech_complex { * \ingroup cech_complex * * \details - * Cech complex is a simplicial complex constructed from a proximity graph, where the set of all simplices is filtered + * Cech complex is a simplicial complex where the set of all simplices is filtered * by the radius of their minimal enclosing ball and bounded by the given max_radius. * * \tparam Kernel CGAL kernel: either Epick_d or Epeck_d. diff --git a/src/Cech_complex/include/gudhi/Cech_complex_blocker.h b/src/Cech_complex/include/gudhi/Cech_complex_blocker.h index 087390b6..9cd49a52 100644 --- a/src/Cech_complex/include/gudhi/Cech_complex_blocker.h +++ b/src/Cech_complex/include/gudhi/Cech_complex_blocker.h @@ -133,7 +133,7 @@ class Cech_blocker { radius = std::sqrt(cast_to_fv(sph.second)); sc_ptr_->assign_key(sh, cc_ptr_->get_cache().size()); - cc_ptr_->get_cache().push_back(sph); + cc_ptr_->get_cache().push_back(std::move(sph)); } #ifdef DEBUG_TRACES diff --git a/src/common/doc/main_page.md b/src/common/doc/main_page.md index 2cb02e3f..ce903405 100644 --- a/src/common/doc/main_page.md +++ b/src/common/doc/main_page.md @@ -180,7 +180,7 @@ Author: Vincent Rouvreau
Introduced in: GUDHI 2.2.0
- Copyright: MIT [(GPL v3)](../../licensing/)
+ Copyright: MIT [(LGPL v3)](../../licensing/)
Requires: \ref cgal -- cgit v1.2.3 From b829a198e16fbef4c0cb2698b2c723fa353aac55 Mon Sep 17 00:00:00 2001 From: Hind-M Date: Fri, 24 Jun 2022 11:03:22 +0200 Subject: Use CGAL::NT_converter instead of CGAL::to_double in Sphere_circumradius --- src/Cech_complex/include/gudhi/Cech_complex.h | 2 +- src/Cech_complex/include/gudhi/Cech_complex_blocker.h | 1 + src/Cech_complex/include/gudhi/Sphere_circumradius.h | 15 +++++++++------ src/Cech_complex/test/test_cech_complex.cpp | 4 ++-- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/Cech_complex/include/gudhi/Cech_complex.h b/src/Cech_complex/include/gudhi/Cech_complex.h index bae21d28..08b7a72f 100644 --- a/src/Cech_complex/include/gudhi/Cech_complex.h +++ b/src/Cech_complex/include/gudhi/Cech_complex.h @@ -70,7 +70,7 @@ class Cech_complex { point_cloud_.assign(std::begin(points), std::end(points)); cech_skeleton_graph_ = Gudhi::compute_proximity_graph( - point_cloud_, max_radius_, Sphere_circumradius()); + point_cloud_, max_radius_, Sphere_circumradius()); } /** \brief Initializes the simplicial complex from the proximity graph and expands it until a given maximal diff --git a/src/Cech_complex/include/gudhi/Cech_complex_blocker.h b/src/Cech_complex/include/gudhi/Cech_complex_blocker.h index 9cd49a52..25d9a71f 100644 --- a/src/Cech_complex/include/gudhi/Cech_complex_blocker.h +++ b/src/Cech_complex/include/gudhi/Cech_complex_blocker.h @@ -12,6 +12,7 @@ #define CECH_COMPLEX_BLOCKER_H_ #include // for casting from FT to Filtration_value +#include // for CGAL::exact #include #include diff --git a/src/Cech_complex/include/gudhi/Sphere_circumradius.h b/src/Cech_complex/include/gudhi/Sphere_circumradius.h index b0d9f7cc..790f6950 100644 --- a/src/Cech_complex/include/gudhi/Sphere_circumradius.h +++ b/src/Cech_complex/include/gudhi/Sphere_circumradius.h @@ -11,7 +11,7 @@ #ifndef SPHERE_CIRCUMRADIUS_H_ #define SPHERE_CIRCUMRADIUS_H_ -#include // for #include +#include // for #include which is not working/compiling alone #include // for std::sqrt #include @@ -22,14 +22,17 @@ namespace cech_complex { /** \private @brief Compute the circumradius of the sphere passing through points given by a range of coordinates. * The points are assumed to have the same dimension. */ -template +template class Sphere_circumradius { private: Kernel kernel_; public: + using FT = typename Kernel::FT; using Point = typename Kernel::Point_d; using Point_cloud = typename std::vector; + CGAL::NT_converter cast_to_fv; + /** \brief Circumradius of sphere passing through two points using CGAL. * * @param[in] point_1 @@ -38,8 +41,8 @@ class Sphere_circumradius { * \tparam Point must be a Kernel::Point_d from CGAL. * */ - double operator()(const Point& point_1, const Point& point_2) const { - return std::sqrt(CGAL::to_double(kernel_.squared_distance_d_object()(point_1, point_2))) / 2.; + Filtration_value operator()(const Point& point_1, const Point& point_2) const { + return std::sqrt(cast_to_fv(kernel_.squared_distance_d_object()(point_1, point_2))) / 2.; } /** \brief Circumradius of sphere passing through point cloud using CGAL. @@ -49,8 +52,8 @@ class Sphere_circumradius { * \tparam Point_cloud must be a range of Kernel::Point_d points from CGAL. * */ - double operator()(const Point_cloud& point_cloud) const { - return std::sqrt(CGAL::to_double(kernel_.compute_squared_radius_d_object()(point_cloud.begin(), point_cloud.end()))); + Filtration_value operator()(const Point_cloud& point_cloud) const { + return std::sqrt(cast_to_fv(kernel_.compute_squared_radius_d_object()(point_cloud.begin(), point_cloud.end()))); } }; diff --git a/src/Cech_complex/test/test_cech_complex.cpp b/src/Cech_complex/test/test_cech_complex.cpp index ea32f596..f5980e6d 100644 --- a/src/Cech_complex/test/test_cech_complex.cpp +++ b/src/Cech_complex/test/test_cech_complex.cpp @@ -107,11 +107,11 @@ BOOST_AUTO_TEST_CASE(Cech_complex_for_documentation) { std::clog << vertex << ","; vp.push_back(points.at(vertex)); } - std::clog << ") - distance =" << Gudhi::cech_complex::Sphere_circumradius()(vp.at(0), vp.at(1)) + std::clog << ") - distance =" << Gudhi::cech_complex::Sphere_circumradius()(vp.at(0), vp.at(1)) << " - filtration =" << st.filtration(f_simplex) << std::endl; BOOST_CHECK(vp.size() == 2); GUDHI_TEST_FLOAT_EQUALITY_CHECK(st.filtration(f_simplex), - Gudhi::cech_complex::Sphere_circumradius()(vp.at(0), vp.at(1))); + Gudhi::cech_complex::Sphere_circumradius()(vp.at(0), vp.at(1))); } } -- cgit v1.2.3