# -*- coding: utf-8 -*- """ Various function that can be usefull """ import numpy as np from scipy.spatial.distance import cdist import multiprocessing import time __time_tic_toc=time.time() def tic(): """ Python implementation of Matlab tic() function """ global __time_tic_toc __time_tic_toc=time.time() def toc(message='Elapsed time : {} s'): """ Python implementation of Matlab toc() function """ t=time.time() print(message.format(t-__time_tic_toc)) return t-__time_tic_toc def toq(): """ Python implementation of Julia toc() function """ t=time.time() return t-__time_tic_toc def kernel(x1,x2,method='gaussian',sigma=1,**kwargs): """Compute kernel matrix""" if method.lower() in ['gaussian','gauss','rbf']: K=np.exp(-dist(x1,x2)/(2*sigma**2)) return K def unif(n): """ return a uniform histogram of length n (simplex) Parameters ---------- n : int number of bins in the histogram Returns ------- h : np.array (n,) histogram of length n such that h_i=1/n for all i """ return np.ones((n,))/n def clean_zeros(a,b,M): """ Remove all components with zeros weights in a and b """ M2=M[a>0,:][:,b>0].copy() # copy force c style matrix (froemd) a2=a[a>0] b2=b[b>0] return a2,b2,M2 def dist(x1,x2=None,metric='sqeuclidean'): """Compute distance between samples in x1 and x2 using function scipy.spatial.distance.cdist Parameters ---------- x1 : np.array (n1,d) matrix with n1 samples of size d x2 : np.array (n2,d), optional matrix with n2 samples of size d (if None then x2=x1) metric : str, fun, optional name of the metric to be computed (full list in the doc of scipy), If a string, the distance function can be 'braycurtis', 'canberra', 'chebyshev', 'cityblock', 'correlation', 'cosine', 'dice', 'euclidean', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'wminkowski', 'yule'. Returns ------- M : np.array (n1,n2) distance matrix computed with given metric """ if x2 is None: x2=x1 return cdist(x1,x2,metric=metric) def dist0(n,method='lin_square'): """Compute standard cost matrices of size (n,n) for OT problems Parameters ---------- n : int size of the cost matrix method : str, optional Type of loss matrix chosen from: * 'lin_square' : linear sampling between 0 and n-1, quadratic loss Returns ------- M : np.array (n1,n2) distance matrix computed with given metric """ res=0 if method=='lin_square': x=np.arange(n,dtype=np.float64).reshape((n,1)) res=dist(x,x) return res def dots(*args): """ dots function for multiple matrix multiply """ return reduce(np.dot,args) def fun(f, q_in, q_out): """ Utility function for parmap with no serializing problems """ while True: i, x = q_in.get() if i is None: break q_out.put((i, f(x))) def parmap(f, X, nprocs=multiprocessing.cpu_count()): """ paralell map for multiprocessing """ q_in = multiprocessing.Queue(1) q_out = multiprocessing.Queue() proc = [multiprocessing.Process(target=fun, args=(f, q_in, q_out)) for _ in range(nprocs)] for p in proc: p.daemon = True p.start() sent = [q_in.put((i, x)) for i, x in enumerate(X)] [q_in.put((None, None)) for _ in range(nprocs)] res = [q_out.get() for _ in range(len(sent))] [p.join() for p in proc] return [x for i, x in sorted(res)]