diff options
author | KatolaZ <katolaz@yahoo.it> | 2015-04-23 15:29:46 +0100 |
---|---|---|
committer | KatolaZ <katolaz@yahoo.it> | 2015-04-23 15:29:46 +0100 |
commit | 36c4a380493cf29bc1b83635e86383a97cae7dd9 (patch) | |
tree | 777ad864a92325f89e9fd552716f48587bafa8f6 /python/multired.py | |
parent | e0b97ed43f8da81ee5865d151ebad7e1d60a55bd (diff) |
Version 0.1
Diffstat (limited to 'python/multired.py')
-rw-r--r-- | python/multired.py | 487 |
1 files changed, 487 insertions, 0 deletions
diff --git a/python/multired.py b/python/multired.py new file mode 100644 index 0000000..7482877 --- /dev/null +++ b/python/multired.py @@ -0,0 +1,487 @@ +import sys +import math +import numpy as np +from scipy.sparse import csr_matrix, eye +from scipy.linalg import eigh, eig +import copy +from scipy.cluster.hierarchy import linkage, dendrogram + +has_matplotlib = False + +try: + import matplotlib + has_matplotlib = True + +except ImportError: + has_matplotlib = False + + +class XLogx_fit: + def __init__(self, degree, npoints= 100, xmax=1): + if xmax > 1: + xmax = 1 + self.degree = degree + x = np.linspace(0, xmax, npoints) + y = [i * math.log(i) for i in x[1:]] + y.insert(0, 0) + self.fit = np.polyfit(x, y, degree) + + def __getitem__ (self, index): + if index <= self.degree: + return self.fit[index] + else: + print "Error!!! Index %d is larger than the degree of the fitting polynomial (%d)" \ + % (index, degree) + sys.exit(-1) + + +class layer: + def __init__ (self, layerfile= None, matrix=None): + self.N = 0 + self.num_layer = -1 + self.fname = layerfile + self.adj_matr = None + self.laplacian = None + self.resc_laplacian = None + self.entropy = None + self.entropy_approx = None + self._ii = [] + self._jj = [] + self._ww = [] + self._matrix_called = False + if layerfile != None: + try: + min_N = 10e10 + with open(layerfile, "r") as lines: + for l in lines: + if l[0] == '#': + continue + elems = l.strip(" \n").split(" ") + s = int(elems[0]) + d = int(elems[1]) + self._ii.append(s) + self._jj.append(d) + if s > self.N: + self.N = s + if d > self.N: + self.N = d + if s < min_N: + min_N = s + if d < min_N: + min_N = d + if len(elems) >2 : ## A weight is specified + val = [float(x) if "e" in x or "." in x else int(x) for x in [elems[2]]][0] + self._ww.append(float(val)) + else: + self._ww.append(int(1)) + + except (IOError): + print "Unable to find/open file %s -- Exiting!!!" % layerfile + exit(-2) + elif matrix != None: + self.adj_matr = copy.copy(matrix) + self.N, _x = matrix.shape + K = np.multiply(self.adj_matr.sum(0), np.ones((self.N,self.N))) + D = np.diag(np.diag(K)) + self.laplacian = csr_matrix(D - self.adj_matr) + K = self.laplacian.diagonal().sum() + self.resc_laplacian = csr_matrix(self.laplacian / K) + self._matrix_called = True + else: + print "The given matrix is BLANK" + def make_matrices(self, N): + self.N = N + self.adj_matr = csr_matrix((self._ww, (self._ii, self._jj)), shape=(self.N, self.N)) + self.adj_matr = self.adj_matr + self.adj_matr.transpose() + K = np.multiply(self.adj_matr.sum(0), np.ones((self.N,self.N))) + D = np.diag(np.diag(K)) + self.laplacian = csr_matrix(D - self.adj_matr) + K = self.laplacian.diagonal().sum() + self.resc_laplacian = csr_matrix(self.laplacian / K) + self._matrix_called = True + + def dump_info(self): + N, M = self.adj_matr.shape + K = self.adj_matr.nnz + sys.stderr.write("Layer File: %s\nNodes: %d Edges: %d\nEntropy: %g Approx. Entropy: %g\n" % \ + (self.fname, N, K, self.entropy, self.entropy_approx) ) + + def compute_VN_entropy(self): + eigvals = eigh(self.resc_laplacian.todense()) + + self.entropy = 0 + for l_i in eigvals[0]: + if (l_i > 10e-20): + self.entropy -= l_i * math.log (l_i) + + + def compute_VN_entropy_approx(self, poly): + p = poly.degree + h = - poly[p] * self.N + M = csr_matrix(np.eye(self.N)) + for i in range(p-1, -1, -1): + M = M * self.resc_laplacian + h += - poly[i] * sum(M.diagonal()) + self.entropy_approx = h + + def aggregate(self, other_layer): + if self.adj_matr != None: + self.adj_matr = self.adj_matr + other_layer.adj_matr + else: + self.adj_matr = copy.copy(other_layer.adj_matr) + K = np.multiply(self.adj_matr.sum(0), np.ones((self.N,self.N))) + D = np.diag(np.diag(K)) + self.laplacian = csr_matrix(D - self.adj_matr) + K = self.laplacian.diagonal().sum() + self.resc_laplacian = csr_matrix(self.laplacian / K) + self._matrix_called = True + + + +class multiplex_red: + + def __init__ (self, multiplexfile, directed = None, fit_degree=10, verbose=False): + self.layers = [] + self.N = 0 + self.M = 0 + self.entropy = 0 + self.entropy_approx = 0 + self.JSD = None + self.JSD_approx = None + self.Z = None + self.Z_approx = None + self.aggr = None + self.q_vals = None + self.q_vals_approx = None + self.fit_degree = fit_degree + self.poly = XLogx_fit(self.fit_degree) + self.verb = verbose + self.cuts = None + self.cuts_approx = None + try: + with open(multiplexfile, "r") as lines: + for l in lines: + if (self.verb): + sys.stderr.write("Loading layer %d from file %s" % (len(self.layers), l)) + A = layer(l.strip(" \n")) + if A.N > self.N: + self.N = A.N+1 + self.layers.append(A) + n = 0 + for l in self.layers: + l.make_matrices(self.N) + l.num_layer = n + n += 1 + self.M = len(self.layers) + except ( IOError): + print "Unable to find/open file %s -- Exiting!!!" % layer_file + exit(-2) + + def dump_info(self): + i = 0 + for l in self.layers: + sys.stderr.write("--------\nLayer: %d\n" % i) + l.dump_info() + i += 1 + + + def compute_aggregated(self): + self.aggr = copy.copy(self.layers[0]) + self.aggr.entropy = 0 + self.aggr.entropy_approx = 0 + for l in self.layers[1:]: + self.aggr.aggregate(l) + + def compute_layer_entropies(self): + for l in self.layers: + l.compute_VN_entropy() + + def compute_layer_entropies_approx(self): + for l in self.layers: + l.compute_VN_entropy_approx(self.poly) + + + def compute_multiplex_entropy(self, force_compute=False): + ### The entropy of a multiplex is defined as the sum of the entropies of its layers + for l in self.layers: + if l.entropy == None: + l.compute_VN_entropy() + self.entropy += l.entropy + + def compute_multiplex_entropy_approx(self, force_compute=False): + ### The entropy of a multiplex is defined as the sum of the entropies of its layers + for l in self.layers: + if l.entropy_approx == None: + l.compute_VN_entropy_approx(self.poly) + self.entropy_approx += l.entropy_approx + + def compute_JSD_matrix(self): + if (self.verb): + sys.stderr.write("Computing JSD matrix\n") + self.JSD = np.zeros((self.M, self.M)) + for i in range(len(self.layers)): + for j in range(i+1, len(self.layers)): + li = self.layers[i] + lj = self.layers[j] + if not li.entropy: + li.compute_VN_entropy() + if not lj.entropy: + lj.compute_VN_entropy() + # m_sigma = (li.resc_laplacian + lj.resc_laplacian)/2.0 + # m_sigma_entropy = mr.compute_VN_entropy_LR(m_sigma) + m_sigma_matr = (li.adj_matr + lj.adj_matr)/2.0 + m_sigma = layer(matrix=m_sigma_matr) + m_sigma.compute_VN_entropy() + d = m_sigma.entropy - 0.5 * (li.entropy + lj.entropy) + d = math.sqrt(d) + self.JSD[i][j] = d + self.JSD[j][i] = d + pass + + def compute_JSD_matrix_approx(self): + if (self.verb): + sys.stderr.write("Computing JSD matrix (approx)\n") + self.JSD_approx = np.zeros((self.M, self.M)) + for i in range(len(self.layers)): + for j in range(i+1, len(self.layers)): + li = self.layers[i] + lj = self.layers[j] + if not li.entropy_approx: + li.compute_VN_entropy_approx(self.poly) + if not lj.entropy_approx: + lj.compute_VN_entropy_approx(self.poly) + m_sigma_matr = (li.adj_matr + lj.adj_matr)/2.0 + m_sigma = layer(matrix=m_sigma_matr) + m_sigma.compute_VN_entropy_approx(self.poly) + d = m_sigma.entropy_approx - 0.5 * (li.entropy_approx + lj.entropy_approx) + d = math.sqrt(d) + self.JSD_approx[i][j] = d + self.JSD_approx[j][i] = d + + def dump_JSD(self, force_compute=False): + if self.JSD == None: + if force_compute: + self.compute_JSD_matrix() + else: + print "Error!!! call to dump_JSD but JSD matrix has not been computed!!!" + sys.exit(1) + idx = 0 + for i in range(self.len): + for j in range(i+1, self.len): + print i, j, self.JSD[idx] + idx += 1 + + def dump_JSD_approx(self, force_compute=False): + if self.JSD_approx == None: + if force_compute: + self.compute_JSD_matrix_approx() + else: + print "Error!!! call to dump_JSD_approx but JSD approximate matrix has not been computed!!!" + sys.exit(1) + idx = 0 + for i in range(self.M): + for j in range(i+1, self.M): + print i, j, self.JSD_approx[idx] + idx += 1 + + + def reduce(self, method="ward"): + if (self.verb): + sys.stderr.write("Performing '%s' reduction\n" % method) + if self.JSD == None: + self.compute_JSD_matrix() + self.Z = linkage(self.JSD, method=method) + return self.Z + + def reduce_approx(self, method="ward"): + if (self.verb): + sys.stderr.write("Performing '%s' reduction (approx)\n" % method) + if self.JSD_approx == None: + self.compute_JSD_matrix_approx() + self.Z_approx = linkage(self.JSD_approx, method=method) + return self.Z_approx + + def get_linkage(self): + return self.Z + + def get_linkage_approx(self): + return self.Z_approx + + def __compute_q(self, layers): + H_avg = 0 + if not self.aggr: + self.compute_aggregated() + self.aggr.compute_VN_entropy() + for l in layers: + if not l.entropy: + l.compute_VN_entropy() + H_avg += l.entropy + H_avg /= len(layers) + q = 1.0 - H_avg / self.aggr.entropy + return q + + def get_q_profile(self): + mylayers = copy.copy(self.layers) + rem_layers = copy.copy(self.layers) + q_vals = [] + if self.Z == None: + self.reduce() + q = self.__compute_q(rem_layers) + q_vals.append(q) + n = len(self.layers) + for l1, l2, _d, _x in self.Z: + l_new = layer(matrix=mylayers[int(l1)].adj_matr) + l_new.num_layer = n + n += 1 + l_new.aggregate(mylayers[int(l2)]) + rem_layers.remove(mylayers[int(l1)]) + rem_layers.remove(mylayers[int(l2)]) + rem_layers.append(l_new) + mylayers.append(l_new) + q = self.__compute_q(rem_layers) + q_vals.append(q) + self.q_vals = q_vals + return q_vals + pass + + + def __compute_q_approx(self, layers): + H_avg = 0 + if not self.aggr: + self.compute_aggregated() + self.aggr.compute_VN_entropy_approx(self.poly) + for l in layers: + if not l.entropy_approx: + l.compute_VN_entropy_approx(self.poly) + H_avg += l.entropy_approx + H_avg /= len(layers) + q = 1.0 - H_avg / self.aggr.entropy_approx + return q + + def get_q_profile_approx(self): + mylayers = copy.copy(self.layers) + rem_layers = copy.copy(self.layers) + q_vals = [] + if self.Z_approx == None: + self.reduce_approx() + q = self.__compute_q_approx(rem_layers) + q_vals.append(q) + n = len(self.layers) + for l1, l2, _d, _x in self.Z_approx: + l_new = layer(matrix=mylayers[int(l1)].adj_matr) + l_new.num_layer = n + n += 1 + l_new.aggregate(mylayers[int(l2)]) + rem_layers.remove(mylayers[int(l1)]) + rem_layers.remove(mylayers[int(l2)]) + rem_layers.append(l_new) + mylayers.append(l_new) + q = self.__compute_q_approx(rem_layers) + q_vals.append(q) + self.q_vals_approx = q_vals + return q_vals + + def compute_partitions(self): + if (self.verb): + sys.stderr.write("Getting partitions...\n") + if self.Z == None: + self.reduce() + if self.q_vals == None: + self.get_q_profile() + sets = {} + M = len(self.layers) + for i in range(len(self.layers)): + sets[i] = [i] + best_pos = self.q_vals.index(max(self.q_vals)) + j = 0 + cur_part = sets.values() + self.cuts = [copy.deepcopy(cur_part)] + while j < M-1: + l1, l2, _x, _y = self.Z[j] + l1 = int(l1) + l2 = int(l2) + val = sets[l1] + val.extend(sets[l2]) + sets[M+j] = val + r1 = cur_part.index(sets[l1]) + cur_part.pop(r1) + r2 = cur_part.index(sets[l2]) + cur_part.pop(r2) + cur_part.append(val) + j += 1 + self.cuts.append(copy.deepcopy(cur_part)) + self.cuts.append(copy.deepcopy(cur_part)) + return zip(self.q_vals, self.cuts) + + + def compute_partitions_approx(self): + if (self.verb): + sys.stderr.write("Getting partitions (approx)...\n") + if self.Z_approx == None: + self.reduce_approx() + if self.q_vals_approx == None: + self.get_q_profile_approx() + sets = {} + M = len(self.layers) + for i in range(len(self.layers)): + sets[i] = [i] + best_pos = self.q_vals_approx.index(max(self.q_vals_approx)) + j = 0 + cur_part = sets.values() + self.cuts_approx = [copy.deepcopy(cur_part)] + while j < M-1: + l1, l2, _x, _y = self.Z_approx[j] + l1 = int(l1) + l2 = int(l2) + val = sets[l1] + val.extend(sets[l2]) + sets[M+j] = val + r1 = cur_part.index(sets[l1]) + cur_part.pop(r1) + r2 = cur_part.index(sets[l2]) + cur_part.pop(r2) + cur_part.append(val) + j += 1 + self.cuts_approx.append(copy.deepcopy(cur_part)) + self.cuts_approx.append(copy.deepcopy(cur_part)) + return zip(self.q_vals_approx, self.cuts_approx) + + def draw_dendrogram(self, force = False): + if not has_matplotlib: + sys.stderr.write("No matplotlib module found in draw_dendrogram...Exiting!!!\n") + sys.exit(3) + if self.Z == None: + if not force: + sys.stderr.write("Please call reduce() first or specify 'force=True'") + else: + self.reduce() + dendrogram(self.Z, no_plot=False) + matplotlib.pyplot.draw() + matplotlib.pyplot.show() + + def draw_dendrogram_approx(self, force = False): + if not has_matplotlib: + sys.stderr.write("No matplotlib module found in draw_dendrogram_approx...Exiting!!!\n") + sys.exit(3) + if self.Z_approx == None: + if not force: + sys.stderr.write("Please call reduce_approx() first or specify 'force=True'") + else: + self.reduce_approx() + dendrogram(self.Z_approx, no_plot=False) + matplotlib.pyplot.draw() + matplotlib.pyplot.show() + + def dump_partitions(self): + part = zip(self.q_vals, self.cuts) + for q, p in part: + print q, "->", p + + def dump_partitions_approx(self): + part = zip(self.q_vals_approx, self.cuts_approx) + for q, p in part: + print q, "->", p + + + + |