diff options
Diffstat (limited to 'structure')
33 files changed, 2267 insertions, 0 deletions
diff --git a/structure/activity/degs_to_activity_overlap.py b/structure/activity/degs_to_activity_overlap.py new file mode 100644 index 0000000..c96959b --- /dev/null +++ b/structure/activity/degs_to_activity_overlap.py @@ -0,0 +1,36 @@ +#### +## +## Take a file which contains, on the n-th line, the degrees at each +## layer of the n-th node, and print on output the corresponding node +## multi-activity (i.e., the number of layers in which the node is +## active) and the overlapping degree (i.e., the total number of edges +## incident on the node) +## +## + + +import sys + +def to_binary(l): + s = 0 + e = 0 + for v in l: + s += v * pow(2,e) + e +=1 + return s + +if len(sys.argv) < 2: + print "Usage: %s <filein>" % sys.argv[0] + sys.exit(1) + +distr = {} + +with open(sys.argv[1]) as f: + for l in f: + elems = [int(x) for x in l.strip(" \n").split(" ")] + ov = sum(elems) + new_list = [1 if x>0 else 0 for x in elems] + multi_act = sum(new_list) + if multi_act and ov: + print ov, multi_act + diff --git a/structure/activity/degs_to_binary.py b/structure/activity/degs_to_binary.py new file mode 100644 index 0000000..cb7aef5 --- /dev/null +++ b/structure/activity/degs_to_binary.py @@ -0,0 +1,45 @@ +#### +## +## Take a file which contains, on the n-th line, the degrees at each +## layer of the n-th node, and print on output the corresponding node +## participation bit-strings, i.e. the string which contains "1" if on +## that layer the node is connected, and zero otherwise +## +## on the stderr, we also dump the distribution of each bit-string +## + + +import sys + +def to_binary(l): + s = 0 + e = 0 + for v in l: + s += v * pow(2,e) + e +=1 + return s + +if len(sys.argv) < 2: + print "Usage: %s <filein>" % sys.argv[0] + sys.exit(1) + +distr = {} + +with open(sys.argv[1]) as f: + for l in f: + elems = [int(x) for x in l.strip(" \n").split(" ")] + new_list = [1 if x>0 else 0 for x in elems] + val = to_binary(new_list) + if val in distr: + distr[val] += 1 + else: + distr[val] = 1 + for i in new_list: + print i, + print + +for k in distr: + bin_list = bin(k) + bin_num = sum([int(x) if x=='1' else 0 for x in bin_list[2:]]) + sys.stderr.write("%d %028s %d \n" % (bin_num, bin_list[2:], distr[k])) + diff --git a/structure/activity/hamming_dist.py b/structure/activity/hamming_dist.py new file mode 100644 index 0000000..2a98e64 --- /dev/null +++ b/structure/activity/hamming_dist.py @@ -0,0 +1,41 @@ +#### +## +## Take as input the layers of a multiplex, and provide as output a +## file where the n-th line contains the activity of the n-th +## layer. +## +## + + +import sys + +if len(sys.argv) < 3: + print "Usage: %s <layer1> <layer2> [<layer3>...]" % sys.argv[0] + sys.exit(1) + +max_N = -1 + +layers = [] + + +for layer in sys.argv[1:]: + active = [] + with open(layer, "r") as lines: + for l in lines: + if l[0] == "#": + continue + + s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]] + if s > max_N: + max_N = s + if d > max_N: + max_N = d + active.extend([s,d]) + active = set(active) + layers.append(active) + +for i in range(len(layers)): + for j in range(i+1, len(layers)): + s = layer[i] ^ layer[j] + print i, j, len(s)*1.0 / min(len(layer[i]) + len(layer[j]), max_N+1) + diff --git a/structure/activity/layer_activity.py b/structure/activity/layer_activity.py new file mode 100644 index 0000000..bea0416 --- /dev/null +++ b/structure/activity/layer_activity.py @@ -0,0 +1,27 @@ +#### +## +## Take as input the layers of a multiplex, and provide as output a +## file where the n-th line contains the activity of the n-th +## layer. +## +## + + +import sys + +if len(sys.argv) < 2: + print "Usage: %s <layer1> [<layer2>...]" % sys.argv[0] + sys.exit(1) + + +for layer in sys.argv[1:]: + active = [] + with open(layer, "r") as lines: + for l in lines: + if l[0] == "#": + continue + + s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]] + active.extend([s,d]) + active = set(active) + print len(active) diff --git a/structure/activity/layer_activity_vectors.py b/structure/activity/layer_activity_vectors.py new file mode 100644 index 0000000..f32418d --- /dev/null +++ b/structure/activity/layer_activity_vectors.py @@ -0,0 +1,44 @@ +#### +## +## Take as input the layers of a multiplex, and provide as output a +## file where the n-th line contains the activity of the n-th +## layer. +## +## + + +import sys + +if len(sys.argv) < 2: + print "Usage: %s <layer1> [<layer2>...]" % sys.argv[0] + sys.exit(1) + +max_N = -1 + +layers = [] + + +for layer in sys.argv[1:]: + active = [] + with open(layer, "r") as lines: + for l in lines: + if l[0] == "#": + continue + + s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]] + if s > max_N: + max_N = s + if d > max_N: + max_N = d + active.extend([s,d]) + active = set(active) + layers.append(active) + +for l in layers: + for n in range(max_N+1): + if n in l: + print 1, + else: + print 0, + print + diff --git a/structure/activity/multiplexity.py b/structure/activity/multiplexity.py new file mode 100644 index 0000000..9e038c9 --- /dev/null +++ b/structure/activity/multiplexity.py @@ -0,0 +1,41 @@ +#### +## +## Take as input the layers of a multiplex, and provide as output a +## file where the n-th line contains the activity of the n-th +## layer. +## +## + + +import sys + +if len(sys.argv) < 3: + print "Usage: %s <layer1> <layer2> [<layer3>...]" % sys.argv[0] + sys.exit(1) + +max_N = -1 + +layers = [] + + +for layer in sys.argv[1:]: + active = [] + with open(layer, "r") as lines: + for l in lines: + if l[0] == "#": + continue + + s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]] + if s > max_N: + max_N = s + if d > max_N: + max_N = d + active.extend([s,d]) + active = set(active) + layers.append(active) + +for i in range(len(layers)): + for j in range(i+1, len(layers)): + s = layer[i] & layer[j] + print i, j, len(s)*1.0 / (max_N+1) + diff --git a/structure/activity/node_activity.py b/structure/activity/node_activity.py new file mode 100644 index 0000000..6410a68 --- /dev/null +++ b/structure/activity/node_activity.py @@ -0,0 +1,51 @@ +#### +## +## Take as input the layers of a multiplex, and provide as output a +## file where the n-th line contains the activity of the n-th node. We +## assume that nodes are numbered sequentially, starting from 0, with +## no gaps (i.e., missing nodes are treated as isolated nodes) +## +## +## + + +import sys + +if len(sys.argv) < 2: + print "Usage: %s <layer1> [<layer2>...]" % sys.argv[0] + sys.exit(1) + +node_activity = {} + +max_N = -1 + +for layer in sys.argv[1:]: + active = [] + with open(layer, "r") as lines: + for l in lines: + if l[0] == "#": + continue + + s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]] + active.extend([s,d]) + if s > max_N: + max_N = s + if d > max_N: + max_N = d + active = set(active) + for n in active: + if n in node_activity: + node_activity[n] += 1 + else: + node_activity[n] = 1 + + +for n in range(max_N+1): + if n in node_activity: + print node_activity[n] + else: + print 0 + + + + diff --git a/structure/activity/node_activity_vectors.py b/structure/activity/node_activity_vectors.py new file mode 100644 index 0000000..9839334 --- /dev/null +++ b/structure/activity/node_activity_vectors.py @@ -0,0 +1,68 @@ +#### +## +## Take as input the layers of a multiplex, and provide as output a +## file where the n-th line contains the degrees of the n-th node at +## each layer, separated by a space, in the format: +## +## node1_deglay1 node1_deglay2 .... node1_deglayM +## node2_deglay1 node2_deglay2 .... node2_deglayM +## .............................................. +## nodeN_deglay1 nodeN_deglay2 .... nodeN_deglayM +## +## + +import sys + +if len(sys.argv) < 2: + print "Usage: %s <layer1> [<layer2>...]" % sys.argv[0] + sys.exit(1) + +node_degrees = {} + +max_N = -1 + +num_layer = 0 + +for layer in sys.argv[1:]: + with open(layer, "r") as lines: + for l in lines: + if l[0] == "#": + continue + + s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]] + + if s > max_N: + max_N = s + if d > max_N: + max_N = d + + if s in node_degrees: + if num_layer in node_degrees[s]: + node_degrees[s][num_layer] += 1 + else: + node_degrees[s][num_layer] = 1 + else: + node_degrees[s] = {} + node_degrees[s][num_layer] = 1 + + if d in node_degrees: + if num_layer in node_degrees[d]: + node_degrees[d][num_layer] += 1 + else: + node_degrees[d][num_layer] = 1 + else: + node_degrees[d] = {} + node_degrees[d][num_layer] = 1 + num_layer += 1 + + +for n in range(max_N+1): + for i in range(num_layer): + if n in node_degrees: + if i in node_degrees[n]: + print 1, + else: + print 0, + else: + print 0, + print diff --git a/structure/activity/node_degree_vectors.py b/structure/activity/node_degree_vectors.py new file mode 100644 index 0000000..8863daa --- /dev/null +++ b/structure/activity/node_degree_vectors.py @@ -0,0 +1,68 @@ +#### +## +## Take as input the layers of a multiplex, and provide as output a +## file where the n-th line contains the degrees of the n-th node at +## each layer, separated by a space, in the format: +## +## node1_deglay1 node1_deglay2 .... node1_deglayM +## node2_deglay1 node2_deglay2 .... node2_deglayM +## .............................................. +## nodeN_deglay1 nodeN_deglay2 .... nodeN_deglayM +## +## + +import sys + +if len(sys.argv) < 2: + print "Usage: %s <layer1> [<layer2>...]" % sys.argv[0] + sys.exit(1) + +node_degrees = {} + +max_N = -1 + +num_layer = 0 + +for layer in sys.argv[1:]: + with open(layer, "r") as lines: + for l in lines: + if l[0] == "#": + continue + + s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]] + + if s > max_N: + max_N = s + if d > max_N: + max_N = d + + if s in node_degrees: + if num_layer in node_degrees[s]: + node_degrees[s][num_layer] += 1 + else: + node_degrees[s][num_layer] = 1 + else: + node_degrees[s] = {} + node_degrees[s][num_layer] = 1 + + if d in node_degrees: + if num_layer in node_degrees[d]: + node_degrees[d][num_layer] += 1 + else: + node_degrees[d][num_layer] = 1 + else: + node_degrees[d] = {} + node_degrees[d][num_layer] = 1 + num_layer += 1 + + +for n in range(max_N+1): + for i in range(num_layer): + if n in node_degrees: + if i in node_degrees[n]: + print node_degrees[n][i], + else: + print 0, + else: + print 0, + print diff --git a/structure/correlations/Makefile b/structure/correlations/Makefile new file mode 100644 index 0000000..58301cb --- /dev/null +++ b/structure/correlations/Makefile @@ -0,0 +1,15 @@ +CFLAGS="-O3" +CC="gcc" +GSL_FLAGS=-lgsl -lgslcblas -lm +MFLAG=-lm + +all: dump_k_q fit_knn + +fit_knn: fit_knn.c fit_utils.c + $(CC) $(CFLAGS) -o fit_knn fit_knn.c fit_utils.c $(GSL_FLAGS) + +dump_k_q: dump_k_q.c rank_utils.c + $(CC) $(CFLAGS) -o dump_k_q dump_k_q.c rank_utils.c $(MFLAG) + +clean: + rm dump_k_q fit_knn diff --git a/structure/correlations/compute_pearson.py b/structure/correlations/compute_pearson.py new file mode 100644 index 0000000..e2c9055 --- /dev/null +++ b/structure/correlations/compute_pearson.py @@ -0,0 +1,33 @@ +#### +## +## Compute the pearson correlation coefficient between the values of +## node properties included in the two files provided as input. +## + +import sys +import numpy +import scipy.stats +import math + +if len(sys.argv) < 3: + print "Usage %s <file1> <file2>" % sys.argv[0] + sys.exit(1) + + +x1 = [] + +with open(sys.argv[1], "r") as lines: + for l in lines: + elem = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split()][0] + x1.append(elem) + +x2 = [] + +with open(sys.argv[2], "r") as lines: + for l in lines: + elem = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split()][0] + x2.append(elem) + + +r2 =numpy.corrcoef(x1,x2) +print r2[0][1] diff --git a/structure/correlations/compute_rho.py b/structure/correlations/compute_rho.py new file mode 100644 index 0000000..8fa4174 --- /dev/null +++ b/structure/correlations/compute_rho.py @@ -0,0 +1,118 @@ +#### +## +## Get two rankings and a pairing, and compute the corresponding +## Spearman's rho rank correlation coefficient +## +## +## +## + +import sys +import random +import math +#import scipy.stats + +## +## Compute the constant C of the Spearman's rho correlation coefficient in the draft +## +def compute_C(rank1, rank2): + N = len(rank1) + [mu1, mu2] = [1.0 * sum(x.values()) / len(x.values()) for x in [rank1, rank2]] + [sum1, sum2] = [1.0 * sum(x.values()) for x in [rank1, rank2]] + C = N * mu1 * mu2 - mu2 * sum1 - mu1 * sum2 + + #print mu1, mu2, sum1, sum2, C + + return C + + +## +## Compute the constant D of the Spearman's rho correlation coefficient in the draft +## +def compute_D(rank1, rank2): + [mu1, mu2] = [1.0 * sum(x.values()) / len(x.values()) for x in [rank1, rank2]] + s1 = sum([math.pow((x-mu1), 2) for x in rank1.values()]) + s2 = sum([math.pow((x-mu2), 2) for x in rank2.values()]) + + D = math.sqrt(s1 * s2) + return D + +def compute_rho(rank1, rank2, pairing, C, D): + + rho = 0 + for s,t in pairing: + rho += rank1[s] * rank2[t] + rho = (rho + C) / D + return rho + +if len(sys.argv) < 3: + print "Usage: %s <rank1> <rank2> [<pairing>]" % sys.argv[0] + sys.exit(1) + +rank1 = {} +rank2 = {} + +lines = open(sys.argv[1], "r").readlines() + +i = 0 +for l in lines: + if l[0] == "#" or l.strip(" \n").split(" ") == []: ## Strip comments and empty lines + continue + r = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split(" ")][0] + rank1[i] = r + i += 1 + + +lines = open(sys.argv[2], "r").readlines() + +i = 0 +for l in lines: + if l[0] == "#" or l.strip(" \n").split(" ") == []: ## Strip comments and empty lines + continue + r = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split(" ")][0] + rank2[i] = r + i += 1 + + +N1 = len(rank1) +N2 = len(rank2) + +if (N1 != N2): + print "The two files must have the same number of nodes!!!!!" + sys.exit(2) + + + +C = compute_C(rank1, rank2) +D = compute_D(rank1, rank2) + + +## We start from a random pairing, and compute the corresponding value of rho + +pairing = [] + +if len(sys.argv) > 3: + lines = open(sys.argv[3], "r").readlines() + + + + for l in lines: + if l[0] == "#" or l.strip(" \n").split(" ") == []: + continue + p1, p2 = [int(x) for x in l.strip(" \n").split(" ")][:2] + pairing.append((p1, p2)) + +else: + for i in range (N1): + pairing.append((i,i)) + + +if len(pairing) != N1: + print "ERROR !!! The pairing should be of the same length of the ranking files!!!" + sys.exit(1) + +rho = compute_rho(rank1, rank2, pairing, C, D) + +print rho + + diff --git a/structure/correlations/compute_tau.py b/structure/correlations/compute_tau.py new file mode 100644 index 0000000..3d1f804 --- /dev/null +++ b/structure/correlations/compute_tau.py @@ -0,0 +1,133 @@ +#### +## +## Take as input two files, whose n^th line contains the ranking of +## element n, and compute the Kendall's \tau_b rank correlation +## coefficient +## +## + +import sys +from numpy import * + + +def kendalltau(x,y): + initial_sort_with_lexsort = True # if True, ~30% slower (but faster under profiler!) but with better worst case (O(n log(n)) than (quick)sort (O(n^2)) + n = len(x) + temp = range(n) # support structure used by mergesort + # this closure recursively sorts sections of perm[] by comparing + # elements of y[perm[]] using temp[] as support + # returns the number of swaps required by an equivalent bubble sort + def mergesort(offs, length): + exchcnt = 0 + if length == 1: + return 0 + if length == 2: + if y[perm[offs]] <= y[perm[offs+1]]: + return 0 + t = perm[offs] + perm[offs] = perm[offs+1] + perm[offs+1] = t + return 1 + length0 = length / 2 + length1 = length - length0 + middle = offs + length0 + exchcnt += mergesort(offs, length0) + exchcnt += mergesort(middle, length1) + if y[perm[middle - 1]] < y[perm[middle]]: + return exchcnt + # merging + i = j = k = 0 + while j < length0 or k < length1: + if k >= length1 or (j < length0 and y[perm[offs + j]] <= y[perm[middle + k]]): + temp[i] = perm[offs + j] + d = i - j + j += 1 + else: + temp[i] = perm[middle + k] + d = (offs + i) - (middle + k) + k += 1 + if d > 0: + exchcnt += d; + i += 1 + perm[offs:offs+length] = temp[0:length] + return exchcnt + + # initial sort on values of x and, if tied, on values of y + if initial_sort_with_lexsort: + # sort implemented as mergesort, worst case: O(n log(n)) + perm = lexsort((y, x)) + else: + # sort implemented as quicksort, 30% faster but with worst case: O(n^2) + perm = range(n) + perm.sort(lambda a,b: cmp(x[a],x[b]) or cmp(y[a],y[b])) + + # compute joint ties + first = 0 + t = 0 + for i in xrange(1,n): + if x[perm[first]] != x[perm[i]] or y[perm[first]] != y[perm[i]]: + t += ((i - first) * (i - first - 1)) / 2 + first = i + t += ((n - first) * (n - first - 1)) / 2 + + # compute ties in x + first = 0 + u = 0 + for i in xrange(1,n): + if x[perm[first]] != x[perm[i]]: + u += ((i - first) * (i - first - 1)) / 2 + first = i + u += ((n - first) * (n - first - 1)) / 2 + + # count exchanges + exchanges = mergesort(0, n) + # compute ties in y after mergesort with counting + first = 0 + v = 0 + for i in xrange(1,n): + if y[perm[first]] != y[perm[i]]: + v += ((i - first) * (i - first - 1)) / 2 + first = i + v += ((n - first) * (n - first - 1)) / 2 + + tot = (n * (n - 1)) / 2 + if tot == u and tot == v: + return 1 # Special case for all ties in both ranks + + tau = ((tot-(v+u-t)) - 2.0 * exchanges) / (sqrt(float(( tot - u )) * float( tot - v ))) + + # what follows reproduces ending of Gary Strangman's original stats.kendalltau() in SciPy + svar = (4.0*n+10.0) / (9.0*n*(n-1)) + z = tau / sqrt(svar) + ##prob = erfc(abs(z)/1.4142136) + ##return tau, prob + return tau + +def main(): + + if len(sys.argv) < 3: + print "Usage: %s <file1> <file2>" % sys.argv[0] + sys.exit(1) + + x1 = [] + x2= [] + + lines = open(sys.argv[1]).readlines() + + for l in lines: + elem = [float(x) if "e" in x or "." in x else int(x) for x in l.strip(" \n").split()][0] + x1.append(elem) + + lines = open(sys.argv[2]).readlines() + + for l in lines: + elem = [float(x) if "e" in x or "." in x else int(x) for x in l.strip(" \n").split()][0] + x2.append(elem) + + + tau = kendalltau(x1,x2) + print tau + + +if __name__ == "__main__": + main() diff --git a/structure/correlations/dump_k_q.c b/structure/correlations/dump_k_q.c new file mode 100644 index 0000000..2b6ba79 --- /dev/null +++ b/structure/correlations/dump_k_q.c @@ -0,0 +1,50 @@ +/** + * + * Get the degree distributions of two layers and a pairing, and dump + * on output the pairs k-q + * + * + */ + +#include <stdio.h> +#include <stdlib.h> + +void dump_k_q(double *R1, double *R2, int N, int *pairing){ + + int i; + + for (i=0; i<N; i++){ + printf("%g %g\n", R1[i], R2[pairing[i]]); + } +} + + +int main(int argc, char *argv[]){ + + int N1, N2; + double *R1 = NULL; + double *R2 = NULL; + + int *pairing = NULL; + + + if (argc < 4){ + printf("Usage: %s <degs1> <degs2> <pairing>\n", argv[0]); + exit(1); + } + + load_ranking(argv[1], &N1, &R1); + load_ranking(argv[2], &N2, &R2); + + if (N1 != N2){ + printf("Error!!!! The two files must have the same number of lines!!!! Exiting...\n"); + exit(1); + } + + pairing = malloc(N1 * sizeof(int)); + + load_pairing(&pairing, N1, argv[3]); + + dump_k_q(R1, R2, N1, pairing); + +} diff --git a/structure/correlations/fit_knn.c b/structure/correlations/fit_knn.c new file mode 100644 index 0000000..64fc4c3 --- /dev/null +++ b/structure/correlations/fit_knn.c @@ -0,0 +1,59 @@ +/** + * + * Take a file which contains pairs in the form: + * + * k q + * + * compute qnn(k) and fit this function with a power-law + * + * a*x^{b} + * + * The fit is actually performed as a linear fit on the + * exponential-binned log-log distribution + * + * + */ + + +#include <stdio.h> +#include <stdlib.h> + +#include <gsl/gsl_fit.h> +#include "fit_utils.h" + + + +int main(int argc, char *argv[]){ + + double *k, *q, *x, *y; + int N, num; + double alpha; + + double c0, c1, cov00, cov01, cov11, sqsum; + + k=q=x=y=NULL; + + if (argc < 3){ + printf("%s <filein> <alpha>\n", argv[0]); + exit(1); + } + + alpha=atof(argv[2]); + + fprintf(stderr,"Alpha: %g\n", alpha); + + load_sequence_col(argv[1], &k, &N, 0); + load_sequence_col(argv[1], &q, &N, 1); + //exit(1); + exp_bin_avg(k, q, N, alpha, &x, &y, &num); + //normalize_distr(x, y, num); + //dump_distr(x, y, num); + compact_distr(x, y, &num); + // dump_distr(x,y,num); + map_vec(x, num, log); + map_vec(y, num, log); + //dump_distr(x,y,num); + + gsl_fit_linear(x, 1, y, 1, num, &c0, &c1, &cov00, &cov01, &cov11, &sqsum); + fprintf(stderr, "a: %g b: %g\n", exp(c0), c1); +} diff --git a/structure/correlations/fit_utils.c b/structure/correlations/fit_utils.c new file mode 100644 index 0000000..e9e3954 --- /dev/null +++ b/structure/correlations/fit_utils.c @@ -0,0 +1,382 @@ +/** + * + * Fit a given sequence with a power-law function + * + * a*x^{b} + * + * The fit is actually performed as a linear fit on the + * exponential-binned log-log distribution + * + * + */ + + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <gsl/gsl_fit.h> + +/** + * + * Load a sequence from a file, which contains one element on each line + * + */ + +void load_sequence(char *fname, double **v, int *N){ + + int size; + char buff[256]; + + FILE *f; + + f = fopen(fname, "r"); + if (!f){ + fprintf(stderr, "Error opening file %s!!!! Exiting...\n", fname); + exit(1); + } + + *N =0; + size = 10; + if (*v == NULL) + *v = malloc(size * sizeof(double)); + else{ + *v = realloc(*v, size * sizeof(double)); + } + + while (fgets(buff, 255, f)){ + (*v)[*N] = atof(buff); + *N += 1; + if (*N == size){ + size += 10; + *v = realloc(*v, size * sizeof(double)); + } + } + *v = realloc(*v, (*N) * sizeof(double)); + fclose(f); +} + + +/** + * + * Load a sequence, getting the "col"-th column of the input file + * + */ + +void load_sequence_col(char *fname, double **v, int *N, int col){ + + int size, n; + char buff[256]; + char *ptr; + + FILE *f; + + f = fopen(fname, "r"); + if (!f){ + fprintf(stderr, "Error opening file %s!!!! Exiting...\n", fname); + exit(1); + } + + *N =0; + size = 10; + if (*v == NULL) + *v = malloc(size * sizeof(double)); + else{ + *v = realloc(*v, size * sizeof(double)); + } + + while (fgets(buff, 255, f)){ + ptr = strtok(buff, " "); + if (col > 0){ + n = 0; + while (n<col){ + ptr = strtok(NULL, " "); + n += 1; + } + } + (*v)[*N] = atof(ptr); + *N += 1; + + if (*N == size){ + size += 10; + *v = realloc(*v, size * sizeof(double)); + } + } + *v = realloc(*v, (*N) * sizeof(double)); + fclose(f); +} + + +/** + * + * Make the exponential binning of a distribution, with a giving + * exponent alpha. The value of y[i] is the number of elements of v + * whose value lies between x[i-1] and x[i]... + * + */ + +void exp_bin_cnt(double *v, int N, double alpha, double **x, double **y, int *num){ + + double min_v, max_v, val, last_val; + int i, j, size, num_x; + double last_size, new_size; + + min_v = max_v = v[0]; + + for (i=1; i<N; i ++){ + if (v[i] > max_v) + max_v = v[i]; + else if (v[i] > 0 && v[i] < min_v) + min_v = v[i]; + } + + size = 10; + if (*x == NULL){ + *x = malloc(size * sizeof(double)); + } + else{ + *x = realloc(*x, size * sizeof(double)); + } + + val = min_v; + last_size = min_v; + (*x)[0] = min_v; + num_x = 1; + + while(val < max_v){ + new_size = last_size * alpha; + val = last_size + new_size; + last_size = new_size; + last_val = val; + (*x)[num_x] = val; + num_x += 1; + if (num_x == size){ + size += 10; + *x = realloc(*x, size * sizeof(double)); + } + } + + if (*y == NULL){ + *y = malloc(num_x * sizeof(double)); + } + else{ + *y = realloc(*y, num_x * sizeof(double)); + } + for (i=0; i < num_x; i++){ + (*y)[i] = 0; + } + + + + for(i=0; i <N; i ++){ + j = 0; + while(v[i] > (*x)[j]){ + j ++; + } + (*y)[j] += 1; + } + *num = num_x; +} + +/** + * + * Make the exponential binning of a distribution, with a giving + * exponent alpha. The value of y[i] is the average of the values in + * the vector "w" whose corresponding v lies between x[i-1] and x[i]... + * + */ + + +void exp_bin_avg(double *v, double *w, int N, double alpha, double **x, double **y, int *num){ + + double min_v, max_v, val, last_val; + int i, j, size, num_x; + double last_size, new_size; + int *cnt; + + + min_v = max_v = v[0]; + + for (i=1; i<N; i ++){ + if (v[i] > max_v) + max_v = v[i]; + else if (v[i] > 0 && v[i] < min_v) + min_v = v[i]; + } + + size = 10; + if (*x == NULL){ + *x = malloc(size * sizeof(double)); + } + else{ + *x = realloc(*x, size * sizeof(double)); + } + + val = min_v; + last_size = min_v; + (*x)[0] = min_v; + num_x = 1; + + while(val < max_v){ + new_size = last_size * alpha; + val = last_size + new_size; + last_size = new_size; + last_val = val; + (*x)[num_x] = val; + num_x += 1; + if (num_x == size){ + size += 10; + *x = realloc(*x, size * sizeof(double)); + } + } + + + cnt = malloc(num_x * sizeof(int)); + + if (*y == NULL){ + *y = malloc(num_x * sizeof(double)); + } + else{ + *y = realloc(*y, num_x * sizeof(double)); + } + for (i=0; i < num_x; i++){ + (*y)[i] = 0; + cnt[i] = 0; + } + + for(i=0; i <N; i ++){ + j = 0; + while(j < num_x && v[i] > (*x)[j]){ + j ++; + } + if(j == num_x){ + printf("Error!!!!! Trying to assing a non-existing bin!!! -- fit_utils.c:exp_bin_avg!!!\n"); + exit(37); + } + (*y)[j] += w[i]; + cnt[j] += 1; + } + *num = num_x; + + for(i=0; i< num_x; i++){ + if (cnt[i] > 0){ + (*y)[i] = (*y)[i] / cnt[i]; + } + } + free(cnt); +} + + +/** + * + * Print a distribution on stdout + * + */ + +void dump_distr(double *x, double *y, int N){ + int i; + + for(i=0; i<N; i ++){ + printf("%g %g\n", x[i], y[i]); + } + +} + + +/** + * Compact a distribution, leaving only the pairs (x_i, y_i) for which + * y_i > 0 + * + */ + +void compact_distr(double *x, double *y, int *num){ + + int i, j; + + i = j = 0; + while(j < *num){ + while(j < *num && y[j] == 0){ + j ++; + } + if (j==*num){ + break; + } + x[i] = x[j]; + y[i] = y[j]; + i ++; + j ++; + } + *num = i; +} + + +/** + * + * Apply the function "f" on all the elemnts of a vector, in-place + * + */ + +void map_vec(double *v, int N, double (*f)(double)){ + int i; + + for (i=0; i<N; i ++){ + v[i] = f(v[i]); + } +} + + +/** + * + * Normalize a distribution, dividing each y[i] for the width of the + * corresponding bin (i.e., x[i] - x[i-1]) + * + */ +void normalize_distr(double *x, double *y, int num){ + + int i; + + for(i=1; i<num; i ++){ + y[i] /= (x[i] - x[i-1]); + } +} + +/** + * + * take two vectors (k and q) and a pairing, and compute the best + * power-law fit "a*k^{mu}" of qnn(k) + * + */ + +void fit_current_trend(double *k, double *q, int N, int *pairing, double *mu, double *a, + double *corr){ + + static int *num; + static double *q_pair, *x, *y; + + int i; + int fit_num; + double c0, c1, cov00, cov01, cov11, sqsum; + + if (q_pair == NULL){ + q_pair = malloc(N * sizeof(double)); + } + + for(i=0; i<N; i++){ + q_pair[i] = q[pairing[i]]; + } + + exp_bin_avg(k, q_pair, N, 1.3, &x, &y, &fit_num); + //normalize_distr(x,y, fit_num); + compact_distr(x, y, &fit_num); + + map_vec(x, fit_num, log); + map_vec(y, fit_num, log); + gsl_fit_linear(x, 1, y, 1, fit_num, &c0, &c1, &cov00, &cov01, &cov11, &sqsum); + + //fprintf(stderr,"cov00: %g cov01: %g cov11: %g\n", cov00, cov01, cov11); + //fprintf(stderr, "corr: %g ", cov01 / sqrt(cov00 * cov11)); + *a = c0; + *mu = c1; + *corr = cov01/sqrt(cov00 * cov11); +} + diff --git a/structure/correlations/fit_utils.h b/structure/correlations/fit_utils.h new file mode 100644 index 0000000..183f47c --- /dev/null +++ b/structure/correlations/fit_utils.h @@ -0,0 +1,25 @@ +#ifndef __FIT_UTILS_H__ +#define __FIT_UTILS_H__ + + +void load_sequence(char *fname, double **v, int *N); + +void load_sequence_col(char *fname, double **v, int *N, int col); + +void exp_bin_cnt(double *v, int N, double alpha, double **x, double **y, int *num); + +void exp_bin_avg(double *v, double *w, int N, double alpha, double **x, double **y, int *num); + +void dump_distr(double *x, double *y, int N); + +void compact_distr(double *x, double *y, int *num); + +void map_vec(double *v, int N, double (*f)(double)); + +void normalize_distr(double *x, double *y, int num); + +void fit_current_trend(double *k, double *q, int N, int *pairing, + double *mu, double *a, double *corr); + + +#endif /* __FIT_UTILS_H__ */ diff --git a/structure/correlations/knn_q_from_degrees.py b/structure/correlations/knn_q_from_degrees.py new file mode 100644 index 0000000..5e10bfa --- /dev/null +++ b/structure/correlations/knn_q_from_degrees.py @@ -0,0 +1,49 @@ +#### +## +## +## Take a file with the degree sequence of nodes which are active on +## both layers, and compute \overline{k}(q) and \overline{q}(k), +## i.e. the two inter-layer correlation functions, where we call "k" +## the degree on the first column, and "q" the degree on the second +## column +## +## + +import sys + +if len(sys.argv) < 2: + print "Usage: %s <filein>" % sys.argv[0] + sys.exit(1) + +qnn_k = {} +knn_q = {} + +with open(sys.argv[1]) as f: + for l in f: + k, q = [int(x) for x in l.strip(" \n").split(" ")] + if qnn_k.has_key(k): + qnn_k[k].append(q) + else: + qnn_k[k] = [q] + if knn_q.has_key(q): + knn_q[q].append(k) + else: + knn_q[q] = [k] + + + + +keys= qnn_k.keys() +keys.sort() +for k in keys: + print k, 1.0 * sum(qnn_k[k])/len(qnn_k[k]) + + + + +keys= knn_q.keys() +keys.sort() +for q in keys: + sys.stderr.write("%d %f\n" % (q, 1.0 * sum(knn_q[q])/len(knn_q[q]))) + + diff --git a/structure/correlations/knn_q_from_layers.py b/structure/correlations/knn_q_from_layers.py new file mode 100644 index 0000000..c108c18 --- /dev/null +++ b/structure/correlations/knn_q_from_layers.py @@ -0,0 +1,98 @@ +#### +## +## +## Compute the degree-degree correlations of a multiplex graph, namely: +## +## <k_1>(k_2) and <k_2>(k_1) +## +## Takes as input the two lists of edges corresponding to each layer +## + +import sys +import numpy as np +import networkx as net + + +def knn(G, n): + neigh = G.neighbors(n) + l = G.degree(neigh).values() + return 1.0 * sum(l) / len(l) + + +if len(sys.argv) < 2: + print "Usage: %s <layer1> <layer2>" % sys.argv[0] + sys.exit(1) + + +G1 = net.read_edgelist(sys.argv[1]) +G2 = net.read_edgelist(sys.argv[2]) + +k1_k1 = {} ## Intraleyer knn (k1) +k2_k2 = {} ## Intralayer knn (k2) +k1_k2 = {} ## Interlayer average degree at layer 1 of a node having degree k_2 in layer 2 +k2_k1 = {} ## Interlayer average degree at layer 2 of a node having degree k_1 in layer 1 + + +for n in G1.nodes(): + k1 = G1.degree(n) + + + ##print k1,k2 + + knn1 = knn(G1, n) + if n in G2.nodes(): + k2 = G2.degree(n) + knn2 = knn(G2, n) + else: + k2 = 0 + knn2 = 0 + + if k1_k1.has_key(k1): + k1_k1[k1].append(knn1) + else: + k1_k1[k1] = [knn1] + + if k2_k2.has_key(k2): + k2_k2[k2].append(knn2) + else: + k2_k2[k2] = [knn2] + + + if k1_k2.has_key(k2): + k1_k2[k2].append(k1) + else: + k1_k2[k2] = [k1] + + if k2_k1.has_key(k1): + k2_k1[k1].append(k2) + else: + k2_k1[k1] = [k2] + + +k1_keys = k1_k1.keys() +k1_keys.sort() +k2_keys = k2_k2.keys() +k2_keys.sort() + + +f = open("%s_%s_k1" % (sys.argv[1], sys.argv[2]), "w+") + +for n in k1_keys: + avg_knn = np.mean(k1_k1[n]) + std_knn = np.std(k1_k1[n]) + avg_k2 = np.mean(k2_k1[n]) + std_k2 = np.std(k2_k1[n]) + f.write("%d %f %f %f %f\n" % (n, avg_knn, std_knn, avg_k2, std_k2)) + +f.close() + +f = open("%s_%s_k2" % (sys.argv[1], sys.argv[2]), "w+") + +for n in k2_keys: + avg_knn = np.mean(k2_k2[n]) + std_knn = np.std(k2_k2[n]) + avg_k1 = np.mean(k1_k2[n]) + std_k1 = np.std(k1_k2[n]) + f.write("%d %f %f %f %f\n" % (n, avg_knn, std_knn, avg_k1, std_k1)) +f.close() + diff --git a/structure/correlations/rank_nodes.py b/structure/correlations/rank_nodes.py new file mode 100644 index 0000000..6985e88 --- /dev/null +++ b/structure/correlations/rank_nodes.py @@ -0,0 +1,74 @@ +#### +## +## +## Get a file as input, whose n-th line corresponds to the value of a +## certain property of node n, and rank nodes according to their +## properties, taking into account ranking ties properly. +## +## The output is a file whose n-th line is the "ranking" of the n-th +## node according to the given property. (notice that rankings could +## be fractional, due to the tie removal algorithm) +## +## + + +import sys +import math + + +if len(sys.argv) < 2: + print "Usage: %s <filein>" % sys.argv[0] + sys.exit(1) + + + +lines = open(sys.argv[1], "r").readlines() + +ranking = [] + +n=0 +for l in lines: + if l[0] == "#" or l.strip(" \n").split(" ") == []: + continue + v = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split(" ")][0] + ranking.append((v,n)) + n +=1 + +ranking.sort(reverse=True) +#print ranking +new_ranking = {} + +v0, n0 = ranking[0] + +old_value = v0 +tot_rankings = 1 + +stack = [n0] +l=1.0 +for v,n in ranking[1:]: + l += 1 + ##print stack, tot_rankings + if v != old_value: ### There is a new rank + # We first compute the rank for all the nodes in the stack and then we set it + new_rank_value = 1.0 * tot_rankings / len(stack) + ##print new_rank_value + for j in stack: + new_ranking[j] = new_rank_value + old_value = v + tot_rankings = l + stack = [n] + else: # One more value with the same rank, keep it for the future + stack.append(n) + tot_rankings += l + +new_rank_value = 1.0 * tot_rankings / len(stack) +#print new_rank_value +for j in stack: + new_ranking[j] = new_rank_value + +#print new_ranking + +keys = new_ranking.keys() +keys.sort() +for k in keys: + print new_ranking[k] diff --git a/structure/correlations/rank_nodes_thresh.py b/structure/correlations/rank_nodes_thresh.py new file mode 100644 index 0000000..44b565a --- /dev/null +++ b/structure/correlations/rank_nodes_thresh.py @@ -0,0 +1,87 @@ +#### +## +## +## Get a file as input, whose n-th line corresponds to the value of a +## certain property of node n, and rank nodes according to their +## properties, taking into account ranking ties properly. +## +## The output is a file whose n-th line is the "ranking" of the n-th +## node according to the given property. (notice that rankings could +## be fractional, due to the tie removal algorithm) +## +## The rank of a node is set to "0" (ZERO) if the corresponding +## property is smaller than a value given as second parameter +## + + +import sys +import math + + +if len(sys.argv) < 3: + print "Usage: %s <filein> <thresh>" % sys.argv[0] + sys.exit(1) + + +thresh = float(sys.argv[2]) + +lines = open(sys.argv[1], "r").readlines() + +ranking = [] + +n=0 +for l in lines: + if l[0] == "#" or l.strip(" \n").split(" ") == []: + continue + v = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split(" ")][0] + if v >= thresh: + ranking.append((v,n)) + else: + ranking.append((0,n)) + n +=1 + +ranking.sort(reverse=True) +#print ranking +new_ranking = {} + +v0, n0 = ranking[0] + + +old_value = v0 +tot_rankings = 1 + +stack = [n0] +l=1.0 +for v,n in ranking[1:]: + l += 1 + ##print stack, tot_rankings + if v != old_value: ### There is a new rank + # We first compute the rank for all the nodes in the stack and then we set it + if old_value == 0: + new_rank_value = 0 + else: + new_rank_value = 1.0 * tot_rankings / len(stack) + ##print new_rank_value + for j in stack: + new_ranking[j] = new_rank_value + old_value = v + tot_rankings = l + stack = [n] + else: # One more value with the same rank, keep it for the future + stack.append(n) + tot_rankings += l + +if v == 0 : + new_rank_value = 0 +else: + new_rank_value = 1.0 * tot_rankings / len(stack) +#print new_rank_value +for j in stack: + new_ranking[j] = new_rank_value + +#print new_ranking + +keys = new_ranking.keys() +keys.sort() +for k in keys: + print new_ranking[k] diff --git a/structure/correlations/rank_occurrence.py b/structure/correlations/rank_occurrence.py new file mode 100644 index 0000000..6339d5d --- /dev/null +++ b/structure/correlations/rank_occurrence.py @@ -0,0 +1,45 @@ +#### +## +## Get two rankings and compute the size of the k-intersection, +## i.e. the number of elements which are present in the first k +## positions of both rankings, as a function of k +## +## + + +import sys + + +if len(sys.argv)< 4: + print "Usage: %s <file1> <file2> <increment>" % sys.argv[0] + sys.exit(1) + +incr = int(sys.argv[3]) + +rank1 = [] +rank2 = [] + +lines = open(sys.argv[1], "r").readlines() + +for l in lines: + n= l.strip(" \n").split(" ")[0] + rank1.append(n) + +lines = open(sys.argv[2], "r").readlines() + +for l in lines: + n= l.strip(" \n").split(" ")[0] + rank2.append(n) + + +N = len(rank1) + +i = incr + +while i < N+incr: + l = len(set(rank1[:i]) & set(rank2[:i])) + print i, l + i += incr + + + diff --git a/structure/correlations/rank_utils.c b/structure/correlations/rank_utils.c new file mode 100644 index 0000000..4b8ef41 --- /dev/null +++ b/structure/correlations/rank_utils.c @@ -0,0 +1,217 @@ +/** + * + * Some utility functions to be used with tune_rho, compute_rho and + * the like + * + */ + + +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <string.h> +#include <time.h> + +#include "rank_utils.h" + +void load_ranking(char *fname, int *N, double **R){ + + char buff[256]; + int size = 10; + FILE *f; + + if (*R == NULL){ + *R = malloc(size * sizeof (double)); + } + f = fopen(fname, "r"); + if (!f){ + printf("Unable to open file: %s!!! Exiting\n"); + exit(1); + } + + *N = 0; + + while (fgets(buff, 255, f)){ + if (* N == size){ + size += 10; + *R = realloc(*R, size * sizeof(double)); + } + (*R)[*N] = atof(buff); + *N += 1; + } +} + +double avg_array(double *v, int N){ + + double sum = 0.0; + int i; + + for (i = 0; i < N; i ++){ + sum += v[i]; + } + return sum/N; +} + +double compute_C(double *R1, double *R2, int N){ + double mu1, mu2, sum1, sum2; + + mu1 = avg_array(R1, N); + mu2 = avg_array(R2, N); + + sum1 = mu1 * N; + sum2 = mu2 * N; + + return N * mu1 * mu2 - mu2 * sum1 - mu1 * sum2; +} + + +double compute_D(double *R1, double *R2, int N){ + + double mu1, mu2, s1, s2; + int i; + + mu1 = avg_array(R1, N); + mu2 = avg_array(R1, N); + + s1 = s2 = 0.0; + + for (i=0 ; i < N; i ++){ + s1 += pow((R1[i] - mu1), 2); + s2 += pow((R2[i] - mu2), 2); + } + + return sqrt(s1 * s2); +} + + +double compute_rho(double *R1, double *R2, int N, int *pairing){ + + double rho = 0; + int i; + + for (i=0; i < N; i ++){ + rho += R1[i] * R2[ pairing[i] ]; + } + + rho = (rho + compute_C(R1, R2, N))/ compute_D(R1, R2, N); + return rho; +} + +void dump_ranking(double *R, int N){ + int i; + + for (i=0; i < N; i ++){ + printf("%d: %f\n", i, R[i] ); + } +} + + +void init_pairing_natural(int *pairing, int N){ + int i; + + for (i = 0; i< N; i ++){ + pairing[i] = i; + } +} + +void init_pairing_inverse(int *pairing, int N){ + int i; + + for (i = 0; i< N; i ++){ + pairing[i] = N-i-1; + } +} + +void select_pairing(int *pairing, int N, int argc, char *argv[], int pos){ + + if (argc < pos + 1 || !strncasecmp("rnd", argv[pos], 3)){ + init_pairing_random(pairing, N); + } + else if (!strncasecmp("nat", argv[pos], 3)){ + init_pairing_natural(pairing, N); + } + else if (!strncasecmp("inv", argv[pos], 3)){ + init_pairing_inverse(pairing, N); + } + else{ + printf ("Pairing strategy \"%s\" unknown!!! Exiting...\n", argv[pos]); + exit(1); + } + +} + + +void shuffle_sequence(int *s, int N){ + + int i, j, tmp; + + for (i=N-1; i>=0; i--){ + j = rand() % (i+1); + tmp = s[j]; + s[j] = s[i]; + s[i] = tmp; + } +} + + + +void init_pairing_random(int *pairing, int N){ + + init_pairing_natural(pairing, N); + shuffle_sequence(pairing, N); + +} + +/* Loads a pairing from a file, in the format: + * + * rank1 rank2 + * ........... + */ +void load_pairing(int **pairing, int N, char *fname){ + + FILE *f; + int i, j, num; + char buff[256]; + char *ptr; + + f = fopen(fname, "r"); + if (!f){ + printf("Error opening file \"%s\"!!!! Exiting....\n", fname); + exit(2); + } + + if (*pairing == NULL){ + *pairing = malloc(N * sizeof(int)); + init_pairing_natural(*pairing, N); + } + + num = 0; + while(num < N){ + fgets(buff, 255, f); + if (buff[0] == '#') + continue; + ptr = strtok(buff, " "); /* read the first node */ + i = atoi(ptr); + ptr = strtok(NULL, " "); /* read the second node */ + j = atoi(ptr); + (*pairing)[i] = j; + num += 1; + } +} + + +void dump_pairing(int *pairing, int N){ + int i; + + for(i=0; i< N; i ++){ + printf("%d %d\n", i, pairing[i]); + } +} + +void copy_pairing(int *p1, int *p2, int N){ + int i; + + for (i=0 ; i < N; i ++){ + p2[i] = p1[i]; + } +} diff --git a/structure/correlations/rank_utils.h b/structure/correlations/rank_utils.h new file mode 100644 index 0000000..521582a --- /dev/null +++ b/structure/correlations/rank_utils.h @@ -0,0 +1,44 @@ +#ifndef __RANK_UTILS_H__ +#define __RANK_UTILS_H__ + +/* Load a ranking */ +void load_ranking(char *fname, int *N, double **R); + +/* Compute the average of an array */ +double avg_array(double *v, int N); + +/* Compute the term "C" in the rho correlation coefficient */ +double compute_C(double *R1, double *R2, int N); + +/* Compute the term "D" in the rho correlation coefficient */ +double compute_D(double *R1, double *R2, int N); + +/* Compute the Spearman's rank correlation coefficient \rho */ +double compute_rho(double *R1, double *R2, int N, int *pairing); + +void dump_ranking(double *R, int N); + +void init_pairing_natural(int *pairing, int N); + +void init_pairing_inverse(int *pairing, int N); + +void init_pairing_random(int *pairing, int N); + +void select_pairing(int *pairing, int N, int argc, char *argv[], int pos); + +void shuffle_sequence(int *s, int N); + +void dump_pairing(int *pairing, int N); + +void copy_pairing(int *pairing1, int *pairing2, int N); + + +#endif /* __RANK_UTILS_H__ */ + + + + + + + + diff --git a/structure/metrics/aggregate_layers_w.py b/structure/metrics/aggregate_layers_w.py new file mode 100644 index 0000000..cd5ea1d --- /dev/null +++ b/structure/metrics/aggregate_layers_w.py @@ -0,0 +1,37 @@ +#### +## +## Aggregate the layers of a multiplex, weigthing each edge according +## to the number of times it occurs in the different layers +## + +import sys +import networkx as net + + +if len(sys.argv) < 3: + print "Usage: %s <file1> <file2> [<file3>....]" % sys.argv(0) + sys.exit(1) + +G = net.Graph() + +lines = open(sys.argv[1], "r").readlines() + +for l in lines: + s,d = [int(x) for x in l.strip(" \n").split(" ")[:2]] + G.add_edge(s,d) + G[s][d]['weigth'] = 1 + +for f in sys.argv[2:]: + lines = open(f, "r").readlines() + for l in lines: + s,d = [int(x) for x in l.strip(" \n").split(" ")[:2]] + if (G.has_edge(s,d)): + G[s][d]['weigth'] += 1 + else: + G.add_edge(s,d) + G[s][d]['weigth'] = 1 + +for s,d in G.edges(): + print s,d, G[s][d]['weigth'] + + diff --git a/structure/metrics/avg_edge_overlap.py b/structure/metrics/avg_edge_overlap.py new file mode 100644 index 0000000..e68fd8b --- /dev/null +++ b/structure/metrics/avg_edge_overlap.py @@ -0,0 +1,47 @@ +#### +## +## Compute the average edge overlap of a multiplex, i.e. the average +## number of layers in which an edge is present +## +## + +import sys + + +if len(sys.argv) < 2: + print "Usage: %s <layer1> [<layer2>...]" % sys.argv[0] + sys.exit(1) + +max_N = -1 + +all_edges = {} + +layer_ID = -1 + +for layer in sys.argv[1:]: + layer_ID += 1 + with open(layer, "r") as lines: + for l in lines: + if l[0] == "#": + continue + s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]] + if s > d: + tmp = s + s = d + d = tmp + if (s,d) in all_edges: + all_edges[(s,d)].append(layer_ID) + else: + all_edges[(s,d)] = [layer_ID] + +K = len(all_edges.keys()) +M = len(sys.argv) - 1 + +numer = 0 + +for k in all_edges.keys(): + numer += len(set(all_edges[(k)])) + + +print 1.0 * numer / K, 1.0 * numer / (K * M) + diff --git a/structure/metrics/cartography_from_columns.py b/structure/metrics/cartography_from_columns.py new file mode 100644 index 0000000..a2343ed --- /dev/null +++ b/structure/metrics/cartography_from_columns.py @@ -0,0 +1,44 @@ +#### +## +## Make a cartography (i.e., sum and participation coefficient) based +## on the values found at the given column numbers of the file given +## as input, e.g.: +## +## cartography_cols.py FILEIN 1 5 8 14 +## +## makes the assumption that the system has 4 layers, and that the +## quantities involved in the cartography are at the 2nd, 6th, 9th and +## 15th column of FILEIN +## +## +## + +import sys + +if len(sys.argv) < 4: + print "Usage: %s <filein> <col1> <col2> [<col3> <col4>...]" % sys.argv[0] + sys.exit(1) + +filein=sys.argv[1] +cols = [int(x) for x in sys.argv[2:]] + +M = len(cols) + +with open(filein,"r") as f: + for l in f: + if l[0] == "#": + continue + elems = [float(x) if "e" in x or "." in x else int(x) for x in l.strip(" \n").split(" ")] + sum_elems = 0 + part = 0 + for c in cols: + val = elems[c] + sum_elems += val + part += val*val + if sum_elems > 0: + part = M * 1.0 / (M -1) * (1 - part * 1.0 / (sum_elems * sum_elems)) + else: + part = 0.0 + print elems[0], sum_elems, part + + diff --git a/structure/metrics/cartography_from_deg_vectors.py b/structure/metrics/cartography_from_deg_vectors.py new file mode 100644 index 0000000..c40d701 --- /dev/null +++ b/structure/metrics/cartography_from_deg_vectors.py @@ -0,0 +1,37 @@ +#### +## +## Take as input a file containing, on each line, the degree vector of +## a node of the multiplex, and compute the multiplex cartography +## diagram +## +## + +import sys + +if len(sys.argv) < 2: + print "Usage: %s <node_deg_vectors>" % sys.argv[0] + sys.exit(1) + +filein=sys.argv[1] + +M = -1 + +with open(filein,"r") as lines: + for l in lines: + if l[0] == "#": + continue + elems = [int(x) for x in l.strip(" \n").split(" ")] + if (M == -1): + M = len(elems) + sum_elems = 0 + part = 0 + for val in elems: + sum_elems += val + part += val*val + if sum_elems > 0: + part = M * 1.0 / (M -1) * (1 - part * 1.0 / (sum_elems * sum_elems)) + else: + part = 0.0 + print sum_elems, part + + diff --git a/structure/metrics/cartography_from_layers.py b/structure/metrics/cartography_from_layers.py new file mode 100644 index 0000000..b9c4584 --- /dev/null +++ b/structure/metrics/cartography_from_layers.py @@ -0,0 +1,54 @@ +#### +## +## Compute the participation coefficient of each node of a multiplex +## +## + +import sys +import networkx as net +import collections +from compiler.ast import flatten + + + +if len(sys.argv) < 3: + print "Usage: %s <layer1> <layer2> [<layer3>...]" % sys.argv[0] + sys.exit(1) + + +layers = [] + +for f in sys.argv[1:]: + G = net.Graph(net.read_edgelist(f, nodetype=int)) + layers.append(G) + +nodes = flatten([x for j in layers for x in j.nodes()]) +#nodes.sort() +nodes = set(nodes) + +M = len(layers) + +#print nodes + +for n in nodes: + deg_alpha_square = 0 + deg = 0 + col = 0 + print n, + for l in layers: + val = l.degree([n]) + if not val: + col = 2* col + continue + col *= 2 + col += 1 + val = val[n] + deg += val + deg_alpha_square += val*val + if deg > 0: + print deg, 1.0 * M / (M-1) * (1.0 - 1.0 * deg_alpha_square/(deg * deg)) , col + else: + print 0 , 0, col + + + diff --git a/structure/metrics/edge_overlap.py b/structure/metrics/edge_overlap.py new file mode 100644 index 0000000..1b27f55 --- /dev/null +++ b/structure/metrics/edge_overlap.py @@ -0,0 +1,43 @@ +#### +## +## Compute the edge overlap of each edge of the multiplex, i.e. the +## number of times that each edge appears in the multiplex +## +## + +import sys + + +if len(sys.argv) < 2: + print "Usage: %s <layer1> [<layer2>...]" % sys.argv[0] + sys.exit(1) + +max_N = -1 + +all_edges = {} + +layer_ID = -1 + +for layer in sys.argv[1:]: + layer_ID += 1 + with open(layer, "r") as lines: + for l in lines: + if l[0] == "#": + continue + s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]] + if s > d: + tmp = s + s = d + d = tmp + if (s,d) in all_edges: + all_edges[(s,d)].append(layer_ID) + else: + all_edges[(s,d)] = [layer_ID] + +K = len(all_edges.keys()) + +for k in all_edges.keys(): + val = len(set(all_edges[(k)])) + s,d = k + print s, d, val + diff --git a/structure/metrics/intersect_layers.py b/structure/metrics/intersect_layers.py new file mode 100644 index 0000000..a9da00c --- /dev/null +++ b/structure/metrics/intersect_layers.py @@ -0,0 +1,47 @@ +#### +## +## Take a certain number of networks as input and give as output the +## corresponding intersection graph, i.e. the graph in which an edge +## exists between i and j if and only if it exists in ALL the graphs +## given as input +## + + +import sys + + +if len(sys.argv)< 3: + print "Usage: %s <file1> <file2> [<file3>....]" % sys.argv[0] + sys.exit(1) + + +graphs = {} + +num = 0 + +for fname in sys.argv[1:]: + + lines = open(fname).readlines() + graphs[num] = [] + for l in lines: + s,d = [int(x) for x in l.strip(" \n").split(" ")][:2] + if s > d: + graphs[num].append((d,s)) + else: + graphs[num].append((d,s)) + num += 1 + +#print graphs + + +for edge in graphs[0]: + to_print = True + for i in range(1, num): + if edge not in graphs[i]: + to_print = False + break + if to_print: + s,d = edge + print s,d + + diff --git a/structure/metrics/overlap_degree.py b/structure/metrics/overlap_degree.py new file mode 100644 index 0000000..fa69434 --- /dev/null +++ b/structure/metrics/overlap_degree.py @@ -0,0 +1,47 @@ +#### +## +## Compute the overlapping degree for each node and the corresponding +## z-score +## +## + +import sys +import numpy + + +if len(sys.argv) < 2: + print "Usage: %s <layer1> <layer2> [<layer3>...]" % sys.argv[0] + sys.exit(1) + + +nodes = {} + +for f in sys.argv[1:]: + + lines = open(f).readlines() + + for l in lines: + if l[0] == "#": + continue + s,d = [int(x) for x in l.strip(" \n").split(" ")] + if nodes.has_key(s): + nodes[s] +=1 + else: + nodes[s] = 1 + if nodes.has_key(d): + nodes[d] +=1 + else: + nodes[d] = 1 + + +degrees = nodes.values() +avg_deg = numpy.mean(degrees) +std_deg = numpy.std(degrees) + +#print avg_deg, std_deg + +keys = nodes.keys() +keys.sort() + +for n in keys: + print n, nodes[n], (nodes[n] - avg_deg)/std_deg diff --git a/structure/reinforcement/reinforcement.py b/structure/reinforcement/reinforcement.py new file mode 100644 index 0000000..5939e59 --- /dev/null +++ b/structure/reinforcement/reinforcement.py @@ -0,0 +1,61 @@ +import networkx as nx +import sys + + +if __name__ == "__main__": + + + if len(sys.argv) < 6: + print "Usage: %s <layer1> <layer2> <N bins> <min_value> <max_value>" % sys.argv[0] + sys.exit(1) + + + filename_a = sys.argv[1] + filename_b = sys.argv[2] + + intervals=int(sys.argv[3]) + minvalue=float(sys.argv[4]) + maxvalue=float(sys.argv[5]) + + + tot_a = [] + pos_a = [] + for t in range (intervals): + tot_a.append(0) + pos_a.append(0) + + + + + fa=open(filename_a, 'r') + Ga=nx.read_adjlist(fa) + + + fb=open(filename_b, 'r') + Gb=nx.read_weighted_edgelist(fb) + + + + + for u,v in Gb.edges(): + Gbw=Gb[u][v]['weight'] + for i in range (intervals): + a=minvalue+float(maxvalue-minvalue)*float(i)/intervals + b=minvalue+float(maxvalue-minvalue)*float(i+1)/intervals + if (Gbw>a and Gbw<b): + tot_a[i]+=1 + break + if (Ga.has_edge(u,v)==True): + pos_a[i]+=1 + + freq_a=[] + for i in range (intervals): + if (tot_a[i]>0): + freq_a.append(float(pos_a[i])/tot_a[i]) + else: + freq_a.append(0) + print "#bin_minvalue bin_maxvalue frequence" + for i in range (intervals): + a=minvalue+float(maxvalue-minvalue)*float(i)/intervals + b=minvalue+float(maxvalue-minvalue)*float(i+1)/intervals + print a, b, freq_a[i] |