From 06ff6fac211d2823c7d14a6d2f4a4db03f48d2e3 Mon Sep 17 00:00:00 2001 From: vrouvrea Date: Fri, 26 Jan 2018 14:01:39 +0000 Subject: Seperate installation and examples from main page Move cover complex utilities from examples GIC.cpp example was not compiled, nor tested. It is removed. Persistence representation : no need to link with Boost_SYSTEM git-svn-id: svn+ssh://scm.gforge.inria.fr/svnroot/gudhi/trunk@3164 636b058d-ea47-450e-bf9e-a15bfbe3eedb Former-commit-id: cf2bfa6c6de2ed359aaa165b9f80bca7e06defb1 --- src/Nerve_GIC/doc/Intro_graph_induced_complex.h | 29 -- src/Nerve_GIC/example/CMakeLists.txt | 12 - src/Nerve_GIC/example/GIC.cpp | 95 ----- .../example/KeplerMapperVisuFromTxtFile.py | 72 ---- src/Nerve_GIC/example/Nerve.cpp | 96 ----- src/Nerve_GIC/example/Nerve.txt | 63 ---- src/Nerve_GIC/example/VoronoiGIC.cpp | 90 ----- src/Nerve_GIC/example/km.py | 390 --------------------- src/Nerve_GIC/example/km.py.COPYRIGHT | 26 -- src/Nerve_GIC/utilities/CMakeLists.txt | 22 ++ .../utilities/KeplerMapperVisuFromTxtFile.py | 72 ++++ src/Nerve_GIC/utilities/Nerve.cpp | 96 +++++ src/Nerve_GIC/utilities/Nerve.txt | 63 ++++ src/Nerve_GIC/utilities/VoronoiGIC.cpp | 90 +++++ src/Nerve_GIC/utilities/km.py | 390 +++++++++++++++++++++ src/Nerve_GIC/utilities/km.py.COPYRIGHT | 26 ++ 16 files changed, 759 insertions(+), 873 deletions(-) delete mode 100644 src/Nerve_GIC/example/GIC.cpp delete mode 100755 src/Nerve_GIC/example/KeplerMapperVisuFromTxtFile.py delete mode 100644 src/Nerve_GIC/example/Nerve.cpp delete mode 100644 src/Nerve_GIC/example/Nerve.txt delete mode 100644 src/Nerve_GIC/example/VoronoiGIC.cpp delete mode 100755 src/Nerve_GIC/example/km.py delete mode 100644 src/Nerve_GIC/example/km.py.COPYRIGHT create mode 100644 src/Nerve_GIC/utilities/CMakeLists.txt create mode 100755 src/Nerve_GIC/utilities/KeplerMapperVisuFromTxtFile.py create mode 100644 src/Nerve_GIC/utilities/Nerve.cpp create mode 100644 src/Nerve_GIC/utilities/Nerve.txt create mode 100644 src/Nerve_GIC/utilities/VoronoiGIC.cpp create mode 100755 src/Nerve_GIC/utilities/km.py create mode 100644 src/Nerve_GIC/utilities/km.py.COPYRIGHT (limited to 'src/Nerve_GIC') diff --git a/src/Nerve_GIC/doc/Intro_graph_induced_complex.h b/src/Nerve_GIC/doc/Intro_graph_induced_complex.h index 7578cc53..344cb031 100644 --- a/src/Nerve_GIC/doc/Intro_graph_induced_complex.h +++ b/src/Nerve_GIC/doc/Intro_graph_induced_complex.h @@ -177,7 +177,6 @@ namespace cover_complex { * \image html "funcGICvisu.jpg" "Visualization with neato" * * \copyright GNU General Public License v3. - * \verbatim Contact: gudhi-users@lists.gforge.inria.fr \endverbatim */ /** @} */ // end defgroup cover_complex @@ -186,31 +185,3 @@ namespace cover_complex { } // namespace Gudhi #endif // DOC_COVER_COMPLEX_INTRO_COVER_COMPLEX_H_ - - -/* * \subsection gicexample Example with cover from function - * - * This example builds the GIC of a point cloud sampled on a 3D human shape (human.off). - * The cover C comes from the preimages of intervals (with length 0.075 and gain 0) - * covering the height function (coordinate 2), - * and the graph G comes from a Rips complex built with threshold 0.075. - * Note that if the gain is too big, the number of cliques increases a lot, - * which make the computation time much larger. - * - * \include Nerve_GIC/GIC.cpp - * - * When launching: - * - * \code $> ./GIC ../../data/points/human.off 0.075 2 0.075 0 --v - * \endcode - * - * the program outputs SC.txt, which can be visualized with python and firefox as before: - * - * \image html "gicvisu.jpg" "Visualization with KeplerMapper" - * */ - - -/* * Using e.g. - * - * \code $> python KeplerMapperVisuFromTxtFile.py && firefox SC.html - * \endcode */ diff --git a/src/Nerve_GIC/example/CMakeLists.txt b/src/Nerve_GIC/example/CMakeLists.txt index 73728dc0..434637fa 100644 --- a/src/Nerve_GIC/example/CMakeLists.txt +++ b/src/Nerve_GIC/example/CMakeLists.txt @@ -3,26 +3,14 @@ project(Nerve_GIC_examples) if (NOT CGAL_VERSION VERSION_LESS 4.8.1) - add_executable ( Nerve Nerve.cpp ) add_executable ( CoordGIC CoordGIC.cpp ) add_executable ( FuncGIC FuncGIC.cpp ) - add_executable ( VoronoiGIC VoronoiGIC.cpp ) if (TBB_FOUND) - target_link_libraries(Nerve ${TBB_LIBRARIES}) target_link_libraries(CoordGIC ${TBB_LIBRARIES}) target_link_libraries(FuncGIC ${TBB_LIBRARIES}) - target_link_libraries(VoronoiGIC ${TBB_LIBRARIES}) endif() - file(COPY KeplerMapperVisuFromTxtFile.py km.py DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) - - add_test(NAME Nerve_GIC_example_nerve COMMAND $ - "${CMAKE_SOURCE_DIR}/data/points/human.off" "2" "10" "0.3") - - add_test(NAME Nerve_GIC_example_VoronoiGIC COMMAND $ - "${CMAKE_SOURCE_DIR}/data/points/human.off" "100") - add_test(NAME Nerve_GIC_example_CoordGIC COMMAND $ "${CMAKE_SOURCE_DIR}/data/points/tore3D_1307.off" "0") diff --git a/src/Nerve_GIC/example/GIC.cpp b/src/Nerve_GIC/example/GIC.cpp deleted file mode 100644 index 2bc24a4d..00000000 --- a/src/Nerve_GIC/example/GIC.cpp +++ /dev/null @@ -1,95 +0,0 @@ -/* This file is part of the Gudhi Library. The Gudhi library - * (Geometric Understanding in Higher Dimensions) is a generic C++ - * library for computational topology. - * - * Author(s): Mathieu Carrière - * - * Copyright (C) 2017 INRIA - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include - -#include -#include - -void usage(int nbArgs, char *const progName) { - std::cerr << "Error: Number of arguments (" << nbArgs << ") is not correct\n"; - std::cerr << "Usage: " << progName << " filename.off threshold coordinate resolution gain [--v] \n"; - std::cerr << " i.e.: " << progName << " ../../data/points/human.off 0.075 2 0.075 0 --v \n"; - exit(-1); // ----- >> -} - -int main(int argc, char **argv) { - if ((argc != 6) && (argc != 7)) usage(argc, argv[0]); - - using Point = std::vector; - - std::string off_file_name(argv[1]); - double threshold = atof(argv[2]); - int coord = atoi(argv[3]); - double resolution = atof(argv[4]); - double gain = atof(argv[5]); - bool verb = 0; - if (argc == 7) verb = 1; - - // ---------------------------------------------------------------------------- - // Init of a graph induced complex from an OFF file - // ---------------------------------------------------------------------------- - - Gudhi::graph_induced_complex::Graph_induced_complex GIC; - GIC.set_verbose(verb); - - bool check = GIC.read_point_cloud(off_file_name); - - if (!check) { - std::cout << "Incorrect OFF file." << std::endl; - } else { - GIC.set_color_from_coordinate(coord); - GIC.set_function_from_coordinate(coord); - - GIC.set_graph_from_rips(threshold, Gudhi::Euclidean_distance()); - - GIC.set_resolution_with_interval_length(resolution); - GIC.set_gain(gain); - GIC.set_cover_from_function(); - - GIC.find_GIC_simplices(); - - GIC.plot_TXT_for_KeplerMapper(); - - Gudhi::Simplex_tree<> stree; - GIC.create_complex(stree); - - // ---------------------------------------------------------------------------- - // Display information about the graph induced complex - // ---------------------------------------------------------------------------- - - if (verb) { - std::cout << "Graph induced complex is of dimension " << stree.dimension() << " - " << stree.num_simplices() - << " simplices - " << stree.num_vertices() << " vertices." << std::endl; - - std::cout << "Iterator on graph induced complex simplices" << std::endl; - for (auto f_simplex : stree.filtration_simplex_range()) { - for (auto vertex : stree.simplex_vertex_range(f_simplex)) { - std::cout << vertex << " "; - } - std::cout << std::endl; - } - } - } - - return 0; -} diff --git a/src/Nerve_GIC/example/KeplerMapperVisuFromTxtFile.py b/src/Nerve_GIC/example/KeplerMapperVisuFromTxtFile.py deleted file mode 100755 index d2897774..00000000 --- a/src/Nerve_GIC/example/KeplerMapperVisuFromTxtFile.py +++ /dev/null @@ -1,72 +0,0 @@ -#!/usr/bin/env python - -import km -import numpy as np -from collections import defaultdict - -"""This file is part of the Gudhi Library. The Gudhi library - (Geometric Understanding in Higher Dimensions) is a generic C++ - library for computational topology. - - Author(s): Mathieu Carriere - - Copyright (C) 2017 INRIA - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -""" - -__author__ = "Mathieu Carriere" -__copyright__ = "Copyright (C) 2017 INRIA" -__license__ = "GPL v3" - -network = {} -mapper = km.KeplerMapper(verbose=0) -data = np.zeros((3,3)) -projected_data = mapper.fit_transform( data, projection="sum", scaler=None ) - -f = open('SC.txt','r') -nodes = defaultdict(list) -links = defaultdict(list) -custom = defaultdict(list) - -dat = f.readline() -lens = f.readline() -color = f.readline(); -param = [float(i) for i in f.readline().split(" ")] - -nums = [int(i) for i in f.readline().split(" ")] -num_nodes = nums[0] -num_edges = nums[1] - -for i in range(0,num_nodes): - point = [float(j) for j in f.readline().split(" ")] - nodes[ str(int(point[0])) ] = [ int(point[0]), point[1], int(point[2]) ] - links[ str(int(point[0])) ] = [] - custom[ int(point[0]) ] = point[1] - -m = min([custom[i] for i in range(0,num_nodes)]) -M = max([custom[i] for i in range(0,num_nodes)]) - -for i in range(0,num_edges): - edge = [int(j) for j in f.readline().split(" ")] - links[ str(edge[0]) ].append( str(edge[1]) ) - links[ str(edge[1]) ].append( str(edge[0]) ) - -network["nodes"] = nodes -network["links"] = links -network["meta"] = lens - -mapper.visualize(network, color_function = color, path_html="SC.html", title=dat, -graph_link_distance=30, graph_gravity=0.1, graph_charge=-120, custom_tooltips=custom, width_html=0, -height_html=0, show_tooltips=True, show_title=True, show_meta=True, res=param[0],gain=param[1], minimum=m,maximum=M) diff --git a/src/Nerve_GIC/example/Nerve.cpp b/src/Nerve_GIC/example/Nerve.cpp deleted file mode 100644 index 6abdedc7..00000000 --- a/src/Nerve_GIC/example/Nerve.cpp +++ /dev/null @@ -1,96 +0,0 @@ -/* This file is part of the Gudhi Library. The Gudhi library - * (Geometric Understanding in Higher Dimensions) is a generic C++ - * library for computational topology. - * - * Author(s): Mathieu Carrière - * - * Copyright (C) 2017 INRIA - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include - -#include -#include - -void usage(int nbArgs, char *const progName) { - std::cerr << "Error: Number of arguments (" << nbArgs << ") is not correct\n"; - std::cerr << "Usage: " << progName << " filename.off coordinate resolution gain [--v] \n"; - std::cerr << " i.e.: " << progName << " ../../data/points/human.off 2 10 0.3 --v \n"; - exit(-1); // ----- >> -} - -int main(int argc, char **argv) { - if ((argc != 5) && (argc != 6)) usage(argc, argv[0]); - - using Point = std::vector; - - std::string off_file_name(argv[1]); - int coord = atoi(argv[2]); - int resolution = atoi(argv[3]); - double gain = atof(argv[4]); - bool verb = 0; - if (argc == 6) verb = 1; - - // -------------------------------- - // Init of a Nerve from an OFF file - // -------------------------------- - - Gudhi::cover_complex::Cover_complex SC; - SC.set_verbose(verb); - - bool check = SC.read_point_cloud(off_file_name); - - if (!check) { - std::cout << "Incorrect OFF file." << std::endl; - } else { - SC.set_type("Nerve"); - - SC.set_color_from_coordinate(coord); - SC.set_function_from_coordinate(coord); - - SC.set_graph_from_OFF(); - SC.set_resolution_with_interval_number(resolution); - SC.set_gain(gain); - SC.set_cover_from_function(); - - SC.find_simplices(); - - SC.write_info(); - - Gudhi::Simplex_tree<> stree; - SC.create_complex(stree); - SC.compute_PD(); - - // ---------------------------------------------------------------------------- - // Display information about the graph induced complex - // ---------------------------------------------------------------------------- - - if (verb) { - std::cout << "Nerve is of dimension " << stree.dimension() << " - " << stree.num_simplices() << " simplices - " - << stree.num_vertices() << " vertices." << std::endl; - - std::cout << "Iterator on Nerve simplices" << std::endl; - for (auto f_simplex : stree.filtration_simplex_range()) { - for (auto vertex : stree.simplex_vertex_range(f_simplex)) { - std::cout << vertex << " "; - } - std::cout << std::endl; - } - } - } - - return 0; -} diff --git a/src/Nerve_GIC/example/Nerve.txt b/src/Nerve_GIC/example/Nerve.txt deleted file mode 100644 index 839ff45e..00000000 --- a/src/Nerve_GIC/example/Nerve.txt +++ /dev/null @@ -1,63 +0,0 @@ -Min function value = -0.979672 and Max function value = 0.816414 -Interval 0 = [-0.979672, -0.761576] -Interval 1 = [-0.838551, -0.581967] -Interval 2 = [-0.658942, -0.402359] -Interval 3 = [-0.479334, -0.22275] -Interval 4 = [-0.299725, -0.0431415] -Interval 5 = [-0.120117, 0.136467] -Interval 6 = [0.059492, 0.316076] -Interval 7 = [0.239101, 0.495684] -Interval 8 = [0.418709, 0.675293] -Interval 9 = [0.598318, 0.816414] -Computing preimages... -Computing connected components... -.txt generated. It can be visualized with e.g. python KeplerMapperVisuFromTxtFile.py and firefox. -5 interval(s) in dimension 0: - [-0.909111, 0.00817529] - [-0.171433, 0.367392] - [-0.171433, 0.367392] - [-0.909111, 0.745853] -0 interval(s) in dimension 1: -Nerve is of dimension 1 - 41 simplices - 21 vertices. -Iterator on Nerve simplices -1 -0 -4 -4 0 -2 -2 1 -8 -8 2 -5 -5 4 -9 -9 8 -13 -13 5 -14 -14 9 -19 -19 13 -25 -32 -20 -32 20 -33 -33 25 -26 -26 14 -26 19 -42 -42 26 -34 -34 33 -27 -27 20 -35 -35 27 -35 34 -42 35 -44 -44 35 -54 -54 44 \ No newline at end of file diff --git a/src/Nerve_GIC/example/VoronoiGIC.cpp b/src/Nerve_GIC/example/VoronoiGIC.cpp deleted file mode 100644 index 32431cc2..00000000 --- a/src/Nerve_GIC/example/VoronoiGIC.cpp +++ /dev/null @@ -1,90 +0,0 @@ -/* This file is part of the Gudhi Library. The Gudhi library - * (Geometric Understanding in Higher Dimensions) is a generic C++ - * library for computational topology. - * - * Author(s): Mathieu Carrière - * - * Copyright (C) 2017 INRIA - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include - -#include -#include - -void usage(int nbArgs, char *const progName) { - std::cerr << "Error: Number of arguments (" << nbArgs << ") is not correct\n"; - std::cerr << "Usage: " << progName << " filename.off N [--v] \n"; - std::cerr << " i.e.: " << progName << " ../../data/points/human.off 100 --v \n"; - exit(-1); // ----- >> -} - -int main(int argc, char **argv) { - if ((argc != 3) && (argc != 4)) usage(argc, argv[0]); - - using Point = std::vector; - - std::string off_file_name(argv[1]); - int m = atoi(argv[2]); - bool verb = 0; - if (argc == 4) verb = 1; - - // ---------------------------------------------------------------------------- - // Init of a graph induced complex from an OFF file - // ---------------------------------------------------------------------------- - - Gudhi::cover_complex::Cover_complex GIC; - GIC.set_verbose(verb); - - bool check = GIC.read_point_cloud(off_file_name); - - if (!check) { - std::cout << "Incorrect OFF file." << std::endl; - } else { - GIC.set_type("GIC"); - - GIC.set_color_from_coordinate(); - - GIC.set_graph_from_OFF(); - GIC.set_cover_from_Voronoi(Gudhi::Euclidean_distance(), m); - - GIC.find_simplices(); - - GIC.plot_OFF(); - - Gudhi::Simplex_tree<> stree; - GIC.create_complex(stree); - - // ---------------------------------------------------------------------------- - // Display information about the graph induced complex - // ---------------------------------------------------------------------------- - - if (verb) { - std::cout << "Graph induced complex is of dimension " << stree.dimension() << " - " << stree.num_simplices() - << " simplices - " << stree.num_vertices() << " vertices." << std::endl; - - std::cout << "Iterator on graph induced complex simplices" << std::endl; - for (auto f_simplex : stree.filtration_simplex_range()) { - for (auto vertex : stree.simplex_vertex_range(f_simplex)) { - std::cout << vertex << " "; - } - std::cout << std::endl; - } - } - } - - return 0; -} diff --git a/src/Nerve_GIC/example/km.py b/src/Nerve_GIC/example/km.py deleted file mode 100755 index 53024aab..00000000 --- a/src/Nerve_GIC/example/km.py +++ /dev/null @@ -1,390 +0,0 @@ -from __future__ import division -import numpy as np -from collections import defaultdict -import json -import itertools -from sklearn import cluster, preprocessing, manifold -from datetime import datetime -import sys - -class KeplerMapper(object): - # With this class you can build topological networks from (high-dimensional) data. - # - # 1) Fit a projection/lens/function to a dataset and transform it. - # For instance "mean_of_row(x) for x in X" - # 2) Map this projection with overlapping intervals/hypercubes. - # Cluster the points inside the interval - # (Note: we cluster on the inverse image/original data to lessen projection loss). - # If two clusters/nodes have the same members (due to the overlap), then: - # connect these with an edge. - # 3) Visualize the network using HTML and D3.js. - # - # functions - # --------- - # fit_transform: Create a projection (lens) from a dataset - # map: Apply Mapper algorithm on this projection and build a simplicial complex - # visualize: Turns the complex dictionary into a HTML/D3.js visualization - - def __init__(self, verbose=2): - self.verbose = verbose - - self.chunk_dist = [] - self.overlap_dist = [] - self.d = [] - self.nr_cubes = 0 - self.overlap_perc = 0 - self.clusterer = False - - def fit_transform(self, X, projection="sum", scaler=preprocessing.MinMaxScaler()): - # Creates the projection/lens from X. - # - # Input: X. Input features as a numpy array. - # Output: projected_X. original data transformed to a projection (lens). - # - # parameters - # ---------- - # projection: Projection parameter is either a string, - # a scikit class with fit_transform, like manifold.TSNE(), - # or a list of dimension indices. - # scaler: if None, do no scaling, else apply scaling to the projection - # Default: Min-Max scaling - - self.scaler = scaler - self.projection = str(projection) - - # Detect if projection is a class (for scikit-learn) - #if str(type(projection))[1:6] == "class": #TODO: de-ugly-fy - # reducer = projection - # if self.verbose > 0: - # try: - # projection.set_params(**{"verbose":self.verbose}) - # except: - # pass - # print("\n..Projecting data using: \n\t%s\n"%str(projection)) - # X = reducer.fit_transform(X) - - # Detect if projection is a string (for standard functions) - if isinstance(projection, str): - if self.verbose > 0: - print("\n..Projecting data using: %s"%(projection)) - # Stats lenses - if projection == "sum": # sum of row - X = np.sum(X, axis=1).reshape((X.shape[0],1)) - if projection == "mean": # mean of row - X = np.mean(X, axis=1).reshape((X.shape[0],1)) - if projection == "median": # mean of row - X = np.median(X, axis=1).reshape((X.shape[0],1)) - if projection == "max": # max of row - X = np.max(X, axis=1).reshape((X.shape[0],1)) - if projection == "min": # min of row - X = np.min(X, axis=1).reshape((X.shape[0],1)) - if projection == "std": # std of row - X = np.std(X, axis=1).reshape((X.shape[0],1)) - - if projection == "dist_mean": # Distance of x to mean of X - X_mean = np.mean(X, axis=0) - X = np.sum(np.sqrt((X - X_mean)**2), axis=1).reshape((X.shape[0],1)) - - # Detect if projection is a list (with dimension indices) - if isinstance(projection, list): - if self.verbose > 0: - print("\n..Projecting data using: %s"%(str(projection))) - X = X[:,np.array(projection)] - - # Scaling - if scaler is not None: - if self.verbose > 0: - print("\n..Scaling with: %s\n"%str(scaler)) - X = scaler.fit_transform(X) - - return X - - def map(self, projected_X, inverse_X=None, clusterer=cluster.DBSCAN(eps=0.5,min_samples=3), nr_cubes=10, overlap_perc=0.1): - # This maps the data to a simplicial complex. Returns a dictionary with nodes and links. - # - # Input: projected_X. A Numpy array with the projection/lens. - # Output: complex. A dictionary with "nodes", "links" and "meta information" - # - # parameters - # ---------- - # projected_X projected_X. A Numpy array with the projection/lens. Required. - # inverse_X Numpy array or None. If None then the projection itself is used for clustering. - # clusterer Scikit-learn API compatible clustering algorithm. Default: DBSCAN - # nr_cubes Int. The number of intervals/hypercubes to create. - # overlap_perc Float. The percentage of overlap "between" the intervals/hypercubes. - - start = datetime.now() - - # Helper function - def cube_coordinates_all(nr_cubes, nr_dimensions): - # Helper function to get origin coordinates for our intervals/hypercubes - # Useful for looping no matter the number of cubes or dimensions - # Example: if there are 4 cubes per dimension and 3 dimensions - # return the bottom left (origin) coordinates of 64 hypercubes, - # as a sorted list of Numpy arrays - # TODO: elegance-ify... - l = [] - for x in range(nr_cubes): - l += [x] * nr_dimensions - return [np.array(list(f)) for f in sorted(set(itertools.permutations(l,nr_dimensions)))] - - nodes = defaultdict(list) - links = defaultdict(list) - complex = {} - self.nr_cubes = nr_cubes - self.clusterer = clusterer - self.overlap_perc = overlap_perc - - if self.verbose > 0: - print("Mapping on data shaped %s using dimensions\n"%(str(projected_X.shape))) - - # If inverse image is not provided, we use the projection as the inverse image (suffer projection loss) - if inverse_X is None: - inverse_X = projected_X - - # We chop up the min-max column ranges into 'nr_cubes' parts - self.chunk_dist = (np.max(projected_X, axis=0) - np.min(projected_X, axis=0))/nr_cubes - - # We calculate the overlapping windows distance - self.overlap_dist = self.overlap_perc * self.chunk_dist - - # We find our starting point - self.d = np.min(projected_X, axis=0) - - # Use a dimension index array on the projected X - # (For now this uses the entire dimensionality, but we keep for experimentation) - di = np.array([x for x in range(projected_X.shape[1])]) - - # Prefix'ing the data with ID's - ids = np.array([x for x in range(projected_X.shape[0])]) - projected_X = np.c_[ids,projected_X] - inverse_X = np.c_[ids,inverse_X] - - # Subdivide the projected data X in intervals/hypercubes with overlap - if self.verbose > 0: - total_cubes = len(cube_coordinates_all(nr_cubes,projected_X.shape[1])) - print("Creating %s hypercubes."%total_cubes) - - for i, coor in enumerate(cube_coordinates_all(nr_cubes,di.shape[0])): - # Slice the hypercube - hypercube = projected_X[ np.invert(np.any((projected_X[:,di+1] >= self.d[di] + (coor * self.chunk_dist[di])) & - (projected_X[:,di+1] < self.d[di] + (coor * self.chunk_dist[di]) + self.chunk_dist[di] + self.overlap_dist[di]) == False, axis=1 )) ] - - if self.verbose > 1: - print("There are %s points in cube_%s / %s with starting range %s"% - (hypercube.shape[0],i,total_cubes,self.d[di] + (coor * self.chunk_dist[di]))) - - # If at least one sample inside the hypercube - if hypercube.shape[0] > 0: - # Cluster the data point(s) in the cube, skipping the id-column - # Note that we apply clustering on the inverse image (original data samples) that fall inside the cube. - inverse_x = inverse_X[[int(nn) for nn in hypercube[:,0]]] - - clusterer.fit(inverse_x[:,1:]) - - if self.verbose > 1: - print("Found %s clusters in cube_%s\n"%(np.unique(clusterer.labels_[clusterer.labels_ > -1]).shape[0],i)) - - #Now for every (sample id in cube, predicted cluster label) - for a in np.c_[hypercube[:,0],clusterer.labels_]: - if a[1] != -1: #if not predicted as noise - cluster_id = str(coor[0])+"_"+str(i)+"_"+str(a[1])+"_"+str(coor)+"_"+str(self.d[di] + (coor * self.chunk_dist[di])) # TODO: de-rudimentary-ify - nodes[cluster_id].append( int(a[0]) ) # Append the member id's as integers - else: - if self.verbose > 1: - print("Cube_%s is empty.\n"%(i)) - - # Create links when clusters from different hypercubes have members with the same sample id. - candidates = itertools.combinations(nodes.keys(),2) - for candidate in candidates: - # if there are non-unique members in the union - if len(nodes[candidate[0]]+nodes[candidate[1]]) != len(set(nodes[candidate[0]]+nodes[candidate[1]])): - links[candidate[0]].append( candidate[1] ) - - # Reporting - if self.verbose > 0: - nr_links = 0 - for k in links: - nr_links += len(links[k]) - print("\ncreated %s edges and %s nodes in %s."%(nr_links,len(nodes),str(datetime.now()-start))) - - complex["nodes"] = nodes - complex["links"] = links - complex["meta"] = self.projection - - return complex - - def visualize(self, complex, color_function="", path_html="mapper_visualization_output.html", title="My Data", - graph_link_distance=30, graph_gravity=0.1, graph_charge=-120, custom_tooltips=None, width_html=0, - height_html=0, show_tooltips=True, show_title=True, show_meta=True, res=0,gain=0,minimum=0,maximum=0): - # Turns the dictionary 'complex' in a html file with d3.js - # - # Input: complex. Dictionary (output from calling .map()) - # Output: a HTML page saved as a file in 'path_html'. - # - # parameters - # ---------- - # color_function string. Not fully implemented. Default: "" (distance to origin) - # path_html file path as string. Where to save the HTML page. - # title string. HTML page document title and first heading. - # graph_link_distance int. Edge length. - # graph_gravity float. "Gravity" to center of layout. - # graph_charge int. charge between nodes. - # custom_tooltips None or Numpy Array. You could use "y"-label array for this. - # width_html int. Width of canvas. Default: 0 (full width) - # height_html int. Height of canvas. Default: 0 (full height) - # show_tooltips bool. default:True - # show_title bool. default:True - # show_meta bool. default:True - - # Format JSON for D3 graph - json_s = {} - json_s["nodes"] = [] - json_s["links"] = [] - k2e = {} # a key to incremental int dict, used for id's when linking - - for e, k in enumerate(complex["nodes"]): - # Tooltip and node color formatting, TODO: de-mess-ify - if custom_tooltips is not None: - tooltip_s = "

Cluster %s

"%k + " ".join(str(custom_tooltips[complex["nodes"][k][0]]).split(" ")) - if maximum == minimum: - tooltip_i = 0 - else: - tooltip_i = int(30*(custom_tooltips[complex["nodes"][k][0]]-minimum)/(maximum-minimum)) - json_s["nodes"].append({"name": str(k), "tooltip": tooltip_s, "group": 2 * int(np.log(complex["nodes"][k][2])), "color": tooltip_i}) - else: - tooltip_s = "

Cluster %s

Contains %s members."%(k,len(complex["nodes"][k])) - json_s["nodes"].append({"name": str(k), "tooltip": tooltip_s, "group": 2 * int(np.log(len(complex["nodes"][k]))), "color": str(k.split("_")[0])}) - k2e[k] = e - for k in complex["links"]: - for link in complex["links"][k]: - json_s["links"].append({"source": k2e[k], "target":k2e[link],"value":1}) - - # Width and height of graph in HTML output - if width_html == 0: - width_css = "100%" - width_js = 'document.getElementById("holder").offsetWidth-20' - else: - width_css = "%spx" % width_html - width_js = "%s" % width_html - if height_html == 0: - height_css = "100%" - height_js = 'document.getElementById("holder").offsetHeight-20' - else: - height_css = "%spx" % height_html - height_js = "%s" % height_html - - # Whether to show certain UI elements or not - if show_tooltips == False: - tooltips_display = "display: none;" - else: - tooltips_display = "" - - if show_meta == False: - meta_display = "display: none;" - else: - meta_display = "" - - if show_title == False: - title_display = "display: none;" - else: - title_display = "" - - with open(path_html,"wb") as outfile: - html = """ - - - %s | KeplerMapper - - - -
-

%s

-

- Lens
%s

- Length of intervals
%s

- Overlap percentage
%s%%

- Color Function
%s -

-
- - """%(title,width_css, height_css, title_display, meta_display, tooltips_display, title,complex["meta"],res,gain*100,color_function,width_js,height_js,graph_charge,graph_link_distance,graph_gravity,json.dumps(json_s)) - outfile.write(html.encode("utf-8")) - if self.verbose > 0: - print("\nWrote d3.js graph to '%s'"%path_html) diff --git a/src/Nerve_GIC/example/km.py.COPYRIGHT b/src/Nerve_GIC/example/km.py.COPYRIGHT deleted file mode 100644 index bef7b121..00000000 --- a/src/Nerve_GIC/example/km.py.COPYRIGHT +++ /dev/null @@ -1,26 +0,0 @@ -km.py is a fork of https://github.com/MLWave/kepler-mapper. -Only the visualization part has been kept (Mapper part has been removed). - -This file has te following Copyright : - -The MIT License (MIT) - -Copyright (c) 2015 Triskelion - HJ van Veen - info@mlwave.com - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/src/Nerve_GIC/utilities/CMakeLists.txt b/src/Nerve_GIC/utilities/CMakeLists.txt new file mode 100644 index 00000000..a0508dc2 --- /dev/null +++ b/src/Nerve_GIC/utilities/CMakeLists.txt @@ -0,0 +1,22 @@ +cmake_minimum_required(VERSION 2.6) +project(Nerve_GIC_examples) + +if (NOT CGAL_VERSION VERSION_LESS 4.8.1) + + add_executable ( Nerve Nerve.cpp ) + add_executable ( VoronoiGIC VoronoiGIC.cpp ) + + if (TBB_FOUND) + target_link_libraries(Nerve ${TBB_LIBRARIES}) + target_link_libraries(VoronoiGIC ${TBB_LIBRARIES}) + endif() + + file(COPY KeplerMapperVisuFromTxtFile.py km.py DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) + + add_test(NAME Nerve_GIC_utilities_nerve COMMAND $ + "${CMAKE_SOURCE_DIR}/data/points/human.off" "2" "10" "0.3") + + add_test(NAME Nerve_GIC_utilities_VoronoiGIC COMMAND $ + "${CMAKE_SOURCE_DIR}/data/points/human.off" "100") + +endif (NOT CGAL_VERSION VERSION_LESS 4.8.1) diff --git a/src/Nerve_GIC/utilities/KeplerMapperVisuFromTxtFile.py b/src/Nerve_GIC/utilities/KeplerMapperVisuFromTxtFile.py new file mode 100755 index 00000000..d2897774 --- /dev/null +++ b/src/Nerve_GIC/utilities/KeplerMapperVisuFromTxtFile.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python + +import km +import numpy as np +from collections import defaultdict + +"""This file is part of the Gudhi Library. The Gudhi library + (Geometric Understanding in Higher Dimensions) is a generic C++ + library for computational topology. + + Author(s): Mathieu Carriere + + Copyright (C) 2017 INRIA + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +""" + +__author__ = "Mathieu Carriere" +__copyright__ = "Copyright (C) 2017 INRIA" +__license__ = "GPL v3" + +network = {} +mapper = km.KeplerMapper(verbose=0) +data = np.zeros((3,3)) +projected_data = mapper.fit_transform( data, projection="sum", scaler=None ) + +f = open('SC.txt','r') +nodes = defaultdict(list) +links = defaultdict(list) +custom = defaultdict(list) + +dat = f.readline() +lens = f.readline() +color = f.readline(); +param = [float(i) for i in f.readline().split(" ")] + +nums = [int(i) for i in f.readline().split(" ")] +num_nodes = nums[0] +num_edges = nums[1] + +for i in range(0,num_nodes): + point = [float(j) for j in f.readline().split(" ")] + nodes[ str(int(point[0])) ] = [ int(point[0]), point[1], int(point[2]) ] + links[ str(int(point[0])) ] = [] + custom[ int(point[0]) ] = point[1] + +m = min([custom[i] for i in range(0,num_nodes)]) +M = max([custom[i] for i in range(0,num_nodes)]) + +for i in range(0,num_edges): + edge = [int(j) for j in f.readline().split(" ")] + links[ str(edge[0]) ].append( str(edge[1]) ) + links[ str(edge[1]) ].append( str(edge[0]) ) + +network["nodes"] = nodes +network["links"] = links +network["meta"] = lens + +mapper.visualize(network, color_function = color, path_html="SC.html", title=dat, +graph_link_distance=30, graph_gravity=0.1, graph_charge=-120, custom_tooltips=custom, width_html=0, +height_html=0, show_tooltips=True, show_title=True, show_meta=True, res=param[0],gain=param[1], minimum=m,maximum=M) diff --git a/src/Nerve_GIC/utilities/Nerve.cpp b/src/Nerve_GIC/utilities/Nerve.cpp new file mode 100644 index 00000000..6abdedc7 --- /dev/null +++ b/src/Nerve_GIC/utilities/Nerve.cpp @@ -0,0 +1,96 @@ +/* This file is part of the Gudhi Library. The Gudhi library + * (Geometric Understanding in Higher Dimensions) is a generic C++ + * library for computational topology. + * + * Author(s): Mathieu Carrière + * + * Copyright (C) 2017 INRIA + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#include +#include + +void usage(int nbArgs, char *const progName) { + std::cerr << "Error: Number of arguments (" << nbArgs << ") is not correct\n"; + std::cerr << "Usage: " << progName << " filename.off coordinate resolution gain [--v] \n"; + std::cerr << " i.e.: " << progName << " ../../data/points/human.off 2 10 0.3 --v \n"; + exit(-1); // ----- >> +} + +int main(int argc, char **argv) { + if ((argc != 5) && (argc != 6)) usage(argc, argv[0]); + + using Point = std::vector; + + std::string off_file_name(argv[1]); + int coord = atoi(argv[2]); + int resolution = atoi(argv[3]); + double gain = atof(argv[4]); + bool verb = 0; + if (argc == 6) verb = 1; + + // -------------------------------- + // Init of a Nerve from an OFF file + // -------------------------------- + + Gudhi::cover_complex::Cover_complex SC; + SC.set_verbose(verb); + + bool check = SC.read_point_cloud(off_file_name); + + if (!check) { + std::cout << "Incorrect OFF file." << std::endl; + } else { + SC.set_type("Nerve"); + + SC.set_color_from_coordinate(coord); + SC.set_function_from_coordinate(coord); + + SC.set_graph_from_OFF(); + SC.set_resolution_with_interval_number(resolution); + SC.set_gain(gain); + SC.set_cover_from_function(); + + SC.find_simplices(); + + SC.write_info(); + + Gudhi::Simplex_tree<> stree; + SC.create_complex(stree); + SC.compute_PD(); + + // ---------------------------------------------------------------------------- + // Display information about the graph induced complex + // ---------------------------------------------------------------------------- + + if (verb) { + std::cout << "Nerve is of dimension " << stree.dimension() << " - " << stree.num_simplices() << " simplices - " + << stree.num_vertices() << " vertices." << std::endl; + + std::cout << "Iterator on Nerve simplices" << std::endl; + for (auto f_simplex : stree.filtration_simplex_range()) { + for (auto vertex : stree.simplex_vertex_range(f_simplex)) { + std::cout << vertex << " "; + } + std::cout << std::endl; + } + } + } + + return 0; +} diff --git a/src/Nerve_GIC/utilities/Nerve.txt b/src/Nerve_GIC/utilities/Nerve.txt new file mode 100644 index 00000000..839ff45e --- /dev/null +++ b/src/Nerve_GIC/utilities/Nerve.txt @@ -0,0 +1,63 @@ +Min function value = -0.979672 and Max function value = 0.816414 +Interval 0 = [-0.979672, -0.761576] +Interval 1 = [-0.838551, -0.581967] +Interval 2 = [-0.658942, -0.402359] +Interval 3 = [-0.479334, -0.22275] +Interval 4 = [-0.299725, -0.0431415] +Interval 5 = [-0.120117, 0.136467] +Interval 6 = [0.059492, 0.316076] +Interval 7 = [0.239101, 0.495684] +Interval 8 = [0.418709, 0.675293] +Interval 9 = [0.598318, 0.816414] +Computing preimages... +Computing connected components... +.txt generated. It can be visualized with e.g. python KeplerMapperVisuFromTxtFile.py and firefox. +5 interval(s) in dimension 0: + [-0.909111, 0.00817529] + [-0.171433, 0.367392] + [-0.171433, 0.367392] + [-0.909111, 0.745853] +0 interval(s) in dimension 1: +Nerve is of dimension 1 - 41 simplices - 21 vertices. +Iterator on Nerve simplices +1 +0 +4 +4 0 +2 +2 1 +8 +8 2 +5 +5 4 +9 +9 8 +13 +13 5 +14 +14 9 +19 +19 13 +25 +32 +20 +32 20 +33 +33 25 +26 +26 14 +26 19 +42 +42 26 +34 +34 33 +27 +27 20 +35 +35 27 +35 34 +42 35 +44 +44 35 +54 +54 44 \ No newline at end of file diff --git a/src/Nerve_GIC/utilities/VoronoiGIC.cpp b/src/Nerve_GIC/utilities/VoronoiGIC.cpp new file mode 100644 index 00000000..32431cc2 --- /dev/null +++ b/src/Nerve_GIC/utilities/VoronoiGIC.cpp @@ -0,0 +1,90 @@ +/* This file is part of the Gudhi Library. The Gudhi library + * (Geometric Understanding in Higher Dimensions) is a generic C++ + * library for computational topology. + * + * Author(s): Mathieu Carrière + * + * Copyright (C) 2017 INRIA + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#include +#include + +void usage(int nbArgs, char *const progName) { + std::cerr << "Error: Number of arguments (" << nbArgs << ") is not correct\n"; + std::cerr << "Usage: " << progName << " filename.off N [--v] \n"; + std::cerr << " i.e.: " << progName << " ../../data/points/human.off 100 --v \n"; + exit(-1); // ----- >> +} + +int main(int argc, char **argv) { + if ((argc != 3) && (argc != 4)) usage(argc, argv[0]); + + using Point = std::vector; + + std::string off_file_name(argv[1]); + int m = atoi(argv[2]); + bool verb = 0; + if (argc == 4) verb = 1; + + // ---------------------------------------------------------------------------- + // Init of a graph induced complex from an OFF file + // ---------------------------------------------------------------------------- + + Gudhi::cover_complex::Cover_complex GIC; + GIC.set_verbose(verb); + + bool check = GIC.read_point_cloud(off_file_name); + + if (!check) { + std::cout << "Incorrect OFF file." << std::endl; + } else { + GIC.set_type("GIC"); + + GIC.set_color_from_coordinate(); + + GIC.set_graph_from_OFF(); + GIC.set_cover_from_Voronoi(Gudhi::Euclidean_distance(), m); + + GIC.find_simplices(); + + GIC.plot_OFF(); + + Gudhi::Simplex_tree<> stree; + GIC.create_complex(stree); + + // ---------------------------------------------------------------------------- + // Display information about the graph induced complex + // ---------------------------------------------------------------------------- + + if (verb) { + std::cout << "Graph induced complex is of dimension " << stree.dimension() << " - " << stree.num_simplices() + << " simplices - " << stree.num_vertices() << " vertices." << std::endl; + + std::cout << "Iterator on graph induced complex simplices" << std::endl; + for (auto f_simplex : stree.filtration_simplex_range()) { + for (auto vertex : stree.simplex_vertex_range(f_simplex)) { + std::cout << vertex << " "; + } + std::cout << std::endl; + } + } + } + + return 0; +} diff --git a/src/Nerve_GIC/utilities/km.py b/src/Nerve_GIC/utilities/km.py new file mode 100755 index 00000000..53024aab --- /dev/null +++ b/src/Nerve_GIC/utilities/km.py @@ -0,0 +1,390 @@ +from __future__ import division +import numpy as np +from collections import defaultdict +import json +import itertools +from sklearn import cluster, preprocessing, manifold +from datetime import datetime +import sys + +class KeplerMapper(object): + # With this class you can build topological networks from (high-dimensional) data. + # + # 1) Fit a projection/lens/function to a dataset and transform it. + # For instance "mean_of_row(x) for x in X" + # 2) Map this projection with overlapping intervals/hypercubes. + # Cluster the points inside the interval + # (Note: we cluster on the inverse image/original data to lessen projection loss). + # If two clusters/nodes have the same members (due to the overlap), then: + # connect these with an edge. + # 3) Visualize the network using HTML and D3.js. + # + # functions + # --------- + # fit_transform: Create a projection (lens) from a dataset + # map: Apply Mapper algorithm on this projection and build a simplicial complex + # visualize: Turns the complex dictionary into a HTML/D3.js visualization + + def __init__(self, verbose=2): + self.verbose = verbose + + self.chunk_dist = [] + self.overlap_dist = [] + self.d = [] + self.nr_cubes = 0 + self.overlap_perc = 0 + self.clusterer = False + + def fit_transform(self, X, projection="sum", scaler=preprocessing.MinMaxScaler()): + # Creates the projection/lens from X. + # + # Input: X. Input features as a numpy array. + # Output: projected_X. original data transformed to a projection (lens). + # + # parameters + # ---------- + # projection: Projection parameter is either a string, + # a scikit class with fit_transform, like manifold.TSNE(), + # or a list of dimension indices. + # scaler: if None, do no scaling, else apply scaling to the projection + # Default: Min-Max scaling + + self.scaler = scaler + self.projection = str(projection) + + # Detect if projection is a class (for scikit-learn) + #if str(type(projection))[1:6] == "class": #TODO: de-ugly-fy + # reducer = projection + # if self.verbose > 0: + # try: + # projection.set_params(**{"verbose":self.verbose}) + # except: + # pass + # print("\n..Projecting data using: \n\t%s\n"%str(projection)) + # X = reducer.fit_transform(X) + + # Detect if projection is a string (for standard functions) + if isinstance(projection, str): + if self.verbose > 0: + print("\n..Projecting data using: %s"%(projection)) + # Stats lenses + if projection == "sum": # sum of row + X = np.sum(X, axis=1).reshape((X.shape[0],1)) + if projection == "mean": # mean of row + X = np.mean(X, axis=1).reshape((X.shape[0],1)) + if projection == "median": # mean of row + X = np.median(X, axis=1).reshape((X.shape[0],1)) + if projection == "max": # max of row + X = np.max(X, axis=1).reshape((X.shape[0],1)) + if projection == "min": # min of row + X = np.min(X, axis=1).reshape((X.shape[0],1)) + if projection == "std": # std of row + X = np.std(X, axis=1).reshape((X.shape[0],1)) + + if projection == "dist_mean": # Distance of x to mean of X + X_mean = np.mean(X, axis=0) + X = np.sum(np.sqrt((X - X_mean)**2), axis=1).reshape((X.shape[0],1)) + + # Detect if projection is a list (with dimension indices) + if isinstance(projection, list): + if self.verbose > 0: + print("\n..Projecting data using: %s"%(str(projection))) + X = X[:,np.array(projection)] + + # Scaling + if scaler is not None: + if self.verbose > 0: + print("\n..Scaling with: %s\n"%str(scaler)) + X = scaler.fit_transform(X) + + return X + + def map(self, projected_X, inverse_X=None, clusterer=cluster.DBSCAN(eps=0.5,min_samples=3), nr_cubes=10, overlap_perc=0.1): + # This maps the data to a simplicial complex. Returns a dictionary with nodes and links. + # + # Input: projected_X. A Numpy array with the projection/lens. + # Output: complex. A dictionary with "nodes", "links" and "meta information" + # + # parameters + # ---------- + # projected_X projected_X. A Numpy array with the projection/lens. Required. + # inverse_X Numpy array or None. If None then the projection itself is used for clustering. + # clusterer Scikit-learn API compatible clustering algorithm. Default: DBSCAN + # nr_cubes Int. The number of intervals/hypercubes to create. + # overlap_perc Float. The percentage of overlap "between" the intervals/hypercubes. + + start = datetime.now() + + # Helper function + def cube_coordinates_all(nr_cubes, nr_dimensions): + # Helper function to get origin coordinates for our intervals/hypercubes + # Useful for looping no matter the number of cubes or dimensions + # Example: if there are 4 cubes per dimension and 3 dimensions + # return the bottom left (origin) coordinates of 64 hypercubes, + # as a sorted list of Numpy arrays + # TODO: elegance-ify... + l = [] + for x in range(nr_cubes): + l += [x] * nr_dimensions + return [np.array(list(f)) for f in sorted(set(itertools.permutations(l,nr_dimensions)))] + + nodes = defaultdict(list) + links = defaultdict(list) + complex = {} + self.nr_cubes = nr_cubes + self.clusterer = clusterer + self.overlap_perc = overlap_perc + + if self.verbose > 0: + print("Mapping on data shaped %s using dimensions\n"%(str(projected_X.shape))) + + # If inverse image is not provided, we use the projection as the inverse image (suffer projection loss) + if inverse_X is None: + inverse_X = projected_X + + # We chop up the min-max column ranges into 'nr_cubes' parts + self.chunk_dist = (np.max(projected_X, axis=0) - np.min(projected_X, axis=0))/nr_cubes + + # We calculate the overlapping windows distance + self.overlap_dist = self.overlap_perc * self.chunk_dist + + # We find our starting point + self.d = np.min(projected_X, axis=0) + + # Use a dimension index array on the projected X + # (For now this uses the entire dimensionality, but we keep for experimentation) + di = np.array([x for x in range(projected_X.shape[1])]) + + # Prefix'ing the data with ID's + ids = np.array([x for x in range(projected_X.shape[0])]) + projected_X = np.c_[ids,projected_X] + inverse_X = np.c_[ids,inverse_X] + + # Subdivide the projected data X in intervals/hypercubes with overlap + if self.verbose > 0: + total_cubes = len(cube_coordinates_all(nr_cubes,projected_X.shape[1])) + print("Creating %s hypercubes."%total_cubes) + + for i, coor in enumerate(cube_coordinates_all(nr_cubes,di.shape[0])): + # Slice the hypercube + hypercube = projected_X[ np.invert(np.any((projected_X[:,di+1] >= self.d[di] + (coor * self.chunk_dist[di])) & + (projected_X[:,di+1] < self.d[di] + (coor * self.chunk_dist[di]) + self.chunk_dist[di] + self.overlap_dist[di]) == False, axis=1 )) ] + + if self.verbose > 1: + print("There are %s points in cube_%s / %s with starting range %s"% + (hypercube.shape[0],i,total_cubes,self.d[di] + (coor * self.chunk_dist[di]))) + + # If at least one sample inside the hypercube + if hypercube.shape[0] > 0: + # Cluster the data point(s) in the cube, skipping the id-column + # Note that we apply clustering on the inverse image (original data samples) that fall inside the cube. + inverse_x = inverse_X[[int(nn) for nn in hypercube[:,0]]] + + clusterer.fit(inverse_x[:,1:]) + + if self.verbose > 1: + print("Found %s clusters in cube_%s\n"%(np.unique(clusterer.labels_[clusterer.labels_ > -1]).shape[0],i)) + + #Now for every (sample id in cube, predicted cluster label) + for a in np.c_[hypercube[:,0],clusterer.labels_]: + if a[1] != -1: #if not predicted as noise + cluster_id = str(coor[0])+"_"+str(i)+"_"+str(a[1])+"_"+str(coor)+"_"+str(self.d[di] + (coor * self.chunk_dist[di])) # TODO: de-rudimentary-ify + nodes[cluster_id].append( int(a[0]) ) # Append the member id's as integers + else: + if self.verbose > 1: + print("Cube_%s is empty.\n"%(i)) + + # Create links when clusters from different hypercubes have members with the same sample id. + candidates = itertools.combinations(nodes.keys(),2) + for candidate in candidates: + # if there are non-unique members in the union + if len(nodes[candidate[0]]+nodes[candidate[1]]) != len(set(nodes[candidate[0]]+nodes[candidate[1]])): + links[candidate[0]].append( candidate[1] ) + + # Reporting + if self.verbose > 0: + nr_links = 0 + for k in links: + nr_links += len(links[k]) + print("\ncreated %s edges and %s nodes in %s."%(nr_links,len(nodes),str(datetime.now()-start))) + + complex["nodes"] = nodes + complex["links"] = links + complex["meta"] = self.projection + + return complex + + def visualize(self, complex, color_function="", path_html="mapper_visualization_output.html", title="My Data", + graph_link_distance=30, graph_gravity=0.1, graph_charge=-120, custom_tooltips=None, width_html=0, + height_html=0, show_tooltips=True, show_title=True, show_meta=True, res=0,gain=0,minimum=0,maximum=0): + # Turns the dictionary 'complex' in a html file with d3.js + # + # Input: complex. Dictionary (output from calling .map()) + # Output: a HTML page saved as a file in 'path_html'. + # + # parameters + # ---------- + # color_function string. Not fully implemented. Default: "" (distance to origin) + # path_html file path as string. Where to save the HTML page. + # title string. HTML page document title and first heading. + # graph_link_distance int. Edge length. + # graph_gravity float. "Gravity" to center of layout. + # graph_charge int. charge between nodes. + # custom_tooltips None or Numpy Array. You could use "y"-label array for this. + # width_html int. Width of canvas. Default: 0 (full width) + # height_html int. Height of canvas. Default: 0 (full height) + # show_tooltips bool. default:True + # show_title bool. default:True + # show_meta bool. default:True + + # Format JSON for D3 graph + json_s = {} + json_s["nodes"] = [] + json_s["links"] = [] + k2e = {} # a key to incremental int dict, used for id's when linking + + for e, k in enumerate(complex["nodes"]): + # Tooltip and node color formatting, TODO: de-mess-ify + if custom_tooltips is not None: + tooltip_s = "

Cluster %s

"%k + " ".join(str(custom_tooltips[complex["nodes"][k][0]]).split(" ")) + if maximum == minimum: + tooltip_i = 0 + else: + tooltip_i = int(30*(custom_tooltips[complex["nodes"][k][0]]-minimum)/(maximum-minimum)) + json_s["nodes"].append({"name": str(k), "tooltip": tooltip_s, "group": 2 * int(np.log(complex["nodes"][k][2])), "color": tooltip_i}) + else: + tooltip_s = "

Cluster %s

Contains %s members."%(k,len(complex["nodes"][k])) + json_s["nodes"].append({"name": str(k), "tooltip": tooltip_s, "group": 2 * int(np.log(len(complex["nodes"][k]))), "color": str(k.split("_")[0])}) + k2e[k] = e + for k in complex["links"]: + for link in complex["links"][k]: + json_s["links"].append({"source": k2e[k], "target":k2e[link],"value":1}) + + # Width and height of graph in HTML output + if width_html == 0: + width_css = "100%" + width_js = 'document.getElementById("holder").offsetWidth-20' + else: + width_css = "%spx" % width_html + width_js = "%s" % width_html + if height_html == 0: + height_css = "100%" + height_js = 'document.getElementById("holder").offsetHeight-20' + else: + height_css = "%spx" % height_html + height_js = "%s" % height_html + + # Whether to show certain UI elements or not + if show_tooltips == False: + tooltips_display = "display: none;" + else: + tooltips_display = "" + + if show_meta == False: + meta_display = "display: none;" + else: + meta_display = "" + + if show_title == False: + title_display = "display: none;" + else: + title_display = "" + + with open(path_html,"wb") as outfile: + html = """ + + + %s | KeplerMapper + + + +
+

%s

+

+ Lens
%s

+ Length of intervals
%s

+ Overlap percentage
%s%%

+ Color Function
%s +

+
+ + """%(title,width_css, height_css, title_display, meta_display, tooltips_display, title,complex["meta"],res,gain*100,color_function,width_js,height_js,graph_charge,graph_link_distance,graph_gravity,json.dumps(json_s)) + outfile.write(html.encode("utf-8")) + if self.verbose > 0: + print("\nWrote d3.js graph to '%s'"%path_html) diff --git a/src/Nerve_GIC/utilities/km.py.COPYRIGHT b/src/Nerve_GIC/utilities/km.py.COPYRIGHT new file mode 100644 index 00000000..bef7b121 --- /dev/null +++ b/src/Nerve_GIC/utilities/km.py.COPYRIGHT @@ -0,0 +1,26 @@ +km.py is a fork of https://github.com/MLWave/kepler-mapper. +Only the visualization part has been kept (Mapper part has been removed). + +This file has te following Copyright : + +The MIT License (MIT) + +Copyright (c) 2015 Triskelion - HJ van Veen - info@mlwave.com + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. -- cgit v1.2.3