summaryrefslogtreecommitdiff
path: root/src/python/test/test_remote_datasets.py
blob: e777abc658594c04a32f50e60cdd3b556ac6ce95 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
# Author(s):       Hind Montassif
#
# Copyright (C) 2021 Inria
#
# Modification(s):
#   - YYYY/MM Author: Description of the modification


from gudhi.datasets import remote
import re
import os.path
import io
import sys
import pytest

def _check_dir_file_names(path_file_dw, filename, dirname):
    assert os.path.isfile(path_file_dw)

    names_dw = re.split(r' |/|\\', path_file_dw)
    # Case where inner directories are created in "remote_datasets/"; e.g: "remote_datasets/bunny"
    if len(names_dw) >= 3:
        for i in range(len(names_dw)-1):
            assert re.split(r' |/|\\', dirname)[i] == names_dw[i]
        assert filename == names_dw[i+1]
    else:
        assert dirname == names_dw[0]
        assert filename == names_dw[1]

def _check_fetch_output(url, filename, dirname = "remote_datasets", file_checksum = None):
    path_file_dw = remote.fetch(url, filename, dirname, file_checksum)
    _check_dir_file_names(path_file_dw, filename, dirname)

def _get_bunny_license_print(accept_license = False):
    capturedOutput = io.StringIO()
    # Redirect stdout
    sys.stdout = capturedOutput
    remote.fetch("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points//bunny/bunny.off", "bunny.off", "remote_datasets/bunny",
                 '11852d5e73e2d4bd7b86a2c5cc8a5884d0fbb72539493e8cec100ea922b19f5b', accept_license)
    # Reset redirect
    sys.stdout = sys.__stdout__
    return capturedOutput

def test_fetch_remote_datasets():
    # Test fetch with a wrong checksum
    with pytest.raises(OSError):
        _check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d.csv", "spiral_2d.csv", file_checksum = 'XXXXXXXXXX')

    # Test files download from given urls with checksums provided
    _check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d.csv", "spiral_2d.csv",
                                file_checksum = '37530355d980d957c4ec06b18c775f90a91e446107d06c6201c9b4000b077f38')

    _check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/sphere3D_pts_on_grid.off", "sphere3D_pts_on_grid.off",
                                file_checksum = '32f96d2cafb1177f0dd5e0a019b6ff5658e14a619a7815ae55ad0fc5e8bd3f88')

    # Test files download from given urls without checksums
    _check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d.csv", "spiral_2d.csv")

    _check_fetch_output("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/sphere3D_pts_on_grid.off", "sphere3D_pts_on_grid.off")

    # Test spiral_2d.csv wrapping function
    path_file_dw = remote.fetch_spiral_2d()
    _check_dir_file_names(path_file_dw, 'spiral_2d.csv', 'remote_datasets')

    # Test printing existing LICENSE file when fetching bunny.off with accept_license = False (default)
    # Fetch LICENSE file
    remote.fetch("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points//bunny/LICENSE", "LICENSE", "remote_datasets/bunny",
                 'aeb1bad319b7d74fa0b8076358182f9c6b1284c67cc07dc67cbc9bc73025d956')
    with open("remote_datasets/bunny/LICENSE") as f:
        assert f.read() == _get_bunny_license_print().getvalue().rstrip("\n")

    # Test not printing bunny.off LICENSE when accept_license = True
    assert "" == _get_bunny_license_print(accept_license = True).getvalue()

    # Test fetch_bunny wrapping function
    path_file_dw = remote.fetch_bunny()
    _check_dir_file_names(path_file_dw[0], 'LICENSE', 'remote_datasets/bunny')
    _check_dir_file_names(path_file_dw[1], 'bunny.off', 'remote_datasets/bunny')