1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
|
# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
# Author(s): Hind Montassif
#
# Copyright (C) 2021 Inria
#
# Modification(s):
# - YYYY/MM Author: Description of the modification
from gudhi.datasets import remote
import re
import shutil
import io
import sys
import pytest
from os.path import isfile, isdir, expanduser
from os import makedirs
def _check_dir_file_names(path_file_dw, filename, dirname):
assert isfile(path_file_dw)
names_dw = re.split(r' |/|\\', path_file_dw)
# Case where inner directories are created in "test_gudhi_data/"; e.g: "test_gudhi_data/bunny"
if len(names_dw) >= 3:
for i in range(len(names_dw)-1):
assert re.split(r' |/|\\', dirname)[i] == names_dw[i]
assert filename == names_dw[i+1]
else:
assert dirname == names_dw[0]
assert filename == names_dw[1]
def _check_fetch_output(url, filename, dirname = "test_gudhi_data", file_checksum = None):
makedirs(dirname, exist_ok=True)
path_file_dw = remote._fetch_remote(url, filename, dirname, file_checksum)
_check_dir_file_names(path_file_dw, filename, dirname)
def _get_bunny_license_print(accept_license = False):
capturedOutput = io.StringIO()
# Redirect stdout
sys.stdout = capturedOutput
makedirs("test_gudhi_data/bunny", exist_ok=True)
remote._fetch_remote("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/bunny/bunny.npy", "bunny.npy", "test_gudhi_data/bunny",
'13f7842ebb4b45370e50641ff28c88685703efa5faab14edf0bb7d113a965e1b', accept_license)
# Reset redirect
sys.stdout = sys.__stdout__
return capturedOutput
def test_fetch_remote_datasets():
# Test fetch with a wrong checksum
with pytest.raises(OSError):
_check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d/spiral_2d.npy", "spiral_2d.npy", file_checksum = 'XXXXXXXXXX')
# Test files download from given urls with checksums provided
_check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d/spiral_2d.npy", "spiral_2d.npy",
file_checksum = '88312ffd6df2e2cb2bde9c0e1f962d7d644c6f58dc369c7b377b298dacdc4eaf')
_check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/sphere3D_pts_on_grid.off", "sphere3D_pts_on_grid.off",
file_checksum = '32f96d2cafb1177f0dd5e0a019b6ff5658e14a619a7815ae55ad0fc5e8bd3f88')
# Test files download from given urls without checksums
_check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d/spiral_2d.npy", "spiral_2d.npy")
_check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/sphere3D_pts_on_grid.off", "sphere3D_pts_on_grid.off")
# Test printing existing LICENSE file when fetching bunny.npy with accept_license = False (default)
# Fetch LICENSE file
makedirs("test_gudhi_data/bunny", exist_ok=True)
remote._fetch_remote("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/bunny/LICENSE", "LICENSE", "test_gudhi_data/bunny",
'b763dbe1b2fc6015d05cbf7bcc686412a2eb100a1f2220296e3b4a644c69633a')
with open("test_gudhi_data/bunny/LICENSE") as f:
assert f.read().rstrip("\n") == _get_bunny_license_print().getvalue().rstrip("\n")
# Test not printing bunny.npy LICENSE when accept_license = True
assert "" == _get_bunny_license_print(accept_license = True).getvalue()
# Remove "test_gudhi_data" directory and all its content
shutil.rmtree("test_gudhi_data")
def test_fetch_remote_datasets_wrapped():
# Check if gudhi_data default dir exists already
to_be_removed = not isdir(expanduser("~/gudhi_data"))
# Test fetch_spiral_2d and fetch_bunny wrapping functions (twice, to test case of already fetched files)
for i in range(2):
spiral_2d_arr = remote.fetch_spiral_2d()
assert spiral_2d_arr.shape == (114562, 2)
bunny_arr = remote.fetch_bunny()
assert bunny_arr.shape == (35947, 3)
# Check that default dir was created
assert isdir(expanduser("~/gudhi_data"))
# Test fetch_spiral_2d and fetch_bunny wrapping functions with data directory different from default
spiral_2d_arr = remote.fetch_spiral_2d(dirname = "./another_fetch_folder_for_test")
assert spiral_2d_arr.shape == (114562, 2)
bunny_arr = remote.fetch_bunny(dirname = "./another_fetch_folder_for_test")
assert bunny_arr.shape == (35947, 3)
assert isdir(expanduser("./another_fetch_folder_for_test"))
# Remove test folders
del spiral_2d_arr
del bunny_arr
if to_be_removed:
shutil.rmtree(expanduser("~/gudhi_data"))
shutil.rmtree(expanduser("./another_fetch_folder_for_test"))
def test_data_home():
# Test get_data_home and clear_data_home on new empty folder
empty_data_home = remote.get_data_home(data_home="empty_folder_for_test")
assert isdir(empty_data_home)
remote.clear_data_home(data_home=empty_data_home)
assert not isdir(empty_data_home)
|