First commit of MAMMULT code

author: KatolaZ <katolaz@yahoo.it> 2015-10-19 16:23:00 +0100
committer: KatolaZ <katolaz@yahoo.it> 2015-10-19 16:23:00 +0100
commit: df8386f75b0538075d72d52693836bb8878f505b (patch)
tree: 704c2a0836f8b9fd9f470c12b6ae05637c431468 /structure
parent: 363274e79eade464247089c105260bc34940da07 (diff)
33 files changed, 2267 insertions, 0 deletions
diff --git a/structure/activity/degs_to_activity_overlap.py b/structure/activity/degs_to_activity_overlap.py
new file mode 100644
index 0000000..c96959b
--- /dev/null
+++ b/structure/activity/degs_to_activity_overlap.py
@@ -0,0 +1,36 @@
+####
+##
+## Take a file which contains, on the n-th line, the degrees at each
+## layer of the n-th node, and print on output the corresponding node
+## multi-activity (i.e., the number of layers in which the node is
+## active) and the overlapping degree (i.e., the total number of edges
+## incident on the node)
+## 
+##
+
+
+import sys
+
+def to_binary(l):
+    s = 0
+    e = 0
+    for v in l:
+        s += v * pow(2,e)
+        e +=1
+    return s
+
+if len(sys.argv) < 2:
+    print "Usage: %s <filein>" % sys.argv[0]
+    sys.exit(1)
+
+distr = {}
+
+with open(sys.argv[1]) as f:
+    for l in f:
+        elems = [int(x) for x in l.strip(" \n").split(" ")]
+        ov = sum(elems)
+        new_list = [1 if x>0 else 0 for x in elems]
+        multi_act = sum(new_list)
+        if multi_act and ov:
+            print ov, multi_act
+    
diff --git a/structure/activity/degs_to_binary.py b/structure/activity/degs_to_binary.py
new file mode 100644
index 0000000..cb7aef5
--- /dev/null
+++ b/structure/activity/degs_to_binary.py
@@ -0,0 +1,45 @@
+####
+##
+## Take a file which contains, on the n-th line, the degrees at each
+## layer of the n-th node, and print on output the corresponding node
+## participation bit-strings, i.e. the string which contains "1" if on
+## that layer the node is connected, and zero otherwise
+## 
+## on the stderr, we also dump the distribution of each bit-string
+##
+
+
+import sys
+
+def to_binary(l):
+    s = 0
+    e = 0
+    for v in l:
+        s += v * pow(2,e)
+        e +=1
+    return s
+
+if len(sys.argv) < 2:
+    print "Usage: %s <filein>" % sys.argv[0]
+    sys.exit(1)
+
+distr = {}
+
+with open(sys.argv[1]) as f:
+    for l in f:
+        elems = [int(x) for x in l.strip(" \n").split(" ")]
+        new_list = [1 if x>0 else 0 for x in elems]
+        val = to_binary(new_list)
+        if val in distr:
+            distr[val] += 1
+        else:
+            distr[val] = 1
+        for i in new_list:
+            print i,
+        print
+
+for k in distr:
+    bin_list = bin(k)
+    bin_num = sum([int(x) if x=='1' else 0 for x in bin_list[2:]])
+    sys.stderr.write("%d %028s %d \n" % (bin_num, bin_list[2:], distr[k]))
+    
diff --git a/structure/activity/hamming_dist.py b/structure/activity/hamming_dist.py
new file mode 100644
index 0000000..2a98e64
--- /dev/null
+++ b/structure/activity/hamming_dist.py
@@ -0,0 +1,41 @@
+####
+##
+## Take as input the layers of a multiplex, and provide as output a
+## file where the n-th line contains the activity of the n-th
+## layer. 
+##
+##
+
+
+import sys
+
+if len(sys.argv) < 3:
+    print "Usage: %s <layer1> <layer2> [<layer3>...]" % sys.argv[0]
+    sys.exit(1)
+
+max_N = -1
+
+layers = []
+
+
+for layer in sys.argv[1:]:
+    active = []
+    with open(layer, "r") as lines:
+        for l in lines:
+            if l[0] == "#":
+                continue
+            
+            s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]]
+            if s > max_N:
+                max_N = s
+            if d > max_N:
+                max_N = d
+            active.extend([s,d])
+        active = set(active)
+        layers.append(active)
+
+for i in range(len(layers)):
+    for j in range(i+1, len(layers)):
+        s = layer[i] ^ layer[j]
+        print i, j, len(s)*1.0 / min(len(layer[i]) + len(layer[j]), max_N+1)
+
diff --git a/structure/activity/layer_activity.py b/structure/activity/layer_activity.py
new file mode 100644
index 0000000..bea0416
--- /dev/null
+++ b/structure/activity/layer_activity.py
@@ -0,0 +1,27 @@
+####
+##
+## Take as input the layers of a multiplex, and provide as output a
+## file where the n-th line contains the activity of the n-th
+## layer. 
+##
+##
+
+
+import sys
+
+if len(sys.argv) < 2:
+    print "Usage: %s <layer1> [<layer2>...]" % sys.argv[0]
+    sys.exit(1)
+
+
+for layer in sys.argv[1:]:
+    active = []
+    with open(layer, "r") as lines:
+        for l in lines:
+            if l[0] == "#":
+                continue
+            
+            s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]]
+            active.extend([s,d])
+        active = set(active)
+        print len(active)
diff --git a/structure/activity/layer_activity_vectors.py b/structure/activity/layer_activity_vectors.py
new file mode 100644
index 0000000..f32418d
--- /dev/null
+++ b/structure/activity/layer_activity_vectors.py
@@ -0,0 +1,44 @@
+####
+##
+## Take as input the layers of a multiplex, and provide as output a
+## file where the n-th line contains the activity of the n-th
+## layer. 
+##
+##
+
+
+import sys
+
+if len(sys.argv) < 2:
+    print "Usage: %s <layer1> [<layer2>...]" % sys.argv[0]
+    sys.exit(1)
+
+max_N = -1
+
+layers = []
+
+
+for layer in sys.argv[1:]:
+    active = []
+    with open(layer, "r") as lines:
+        for l in lines:
+            if l[0] == "#":
+                continue
+            
+            s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]]
+            if s > max_N:
+                max_N = s
+            if d > max_N:
+                max_N = d
+            active.extend([s,d])
+        active = set(active)
+        layers.append(active)
+
+for l in layers:
+    for n in range(max_N+1):
+        if n in l:
+            print 1,
+        else:
+            print 0,
+    print
+
diff --git a/structure/activity/multiplexity.py b/structure/activity/multiplexity.py
new file mode 100644
index 0000000..9e038c9
--- /dev/null
+++ b/structure/activity/multiplexity.py
@@ -0,0 +1,41 @@
+####
+##
+## Take as input the layers of a multiplex, and provide as output a
+## file where the n-th line contains the activity of the n-th
+## layer. 
+##
+##
+
+
+import sys
+
+if len(sys.argv) < 3:
+    print "Usage: %s <layer1> <layer2> [<layer3>...]" % sys.argv[0]
+    sys.exit(1)
+
+max_N = -1
+
+layers = []
+
+
+for layer in sys.argv[1:]:
+    active = []
+    with open(layer, "r") as lines:
+        for l in lines:
+            if l[0] == "#":
+                continue
+            
+            s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]]
+            if s > max_N:
+                max_N = s
+            if d > max_N:
+                max_N = d
+            active.extend([s,d])
+        active = set(active)
+        layers.append(active)
+
+for i in range(len(layers)):
+    for j in range(i+1, len(layers)):
+        s = layer[i] & layer[j]
+        print i, j, len(s)*1.0 / (max_N+1)
+
diff --git a/structure/activity/node_activity.py b/structure/activity/node_activity.py
new file mode 100644
index 0000000..6410a68
--- /dev/null
+++ b/structure/activity/node_activity.py
@@ -0,0 +1,51 @@
+####
+##
+## Take as input the layers of a multiplex, and provide as output a
+## file where the n-th line contains the activity of the n-th node. We
+## assume that nodes are numbered sequentially, starting from 0, with
+## no gaps (i.e., missing nodes are treated as isolated nodes)
+##
+##
+##
+
+
+import sys
+
+if len(sys.argv) < 2:
+    print "Usage: %s <layer1> [<layer2>...]" % sys.argv[0]
+    sys.exit(1)
+
+node_activity = {}
+
+max_N = -1
+
+for layer in sys.argv[1:]:
+    active = []
+    with open(layer, "r") as lines:
+        for l in lines:
+            if l[0] == "#":
+                continue
+            
+            s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]]
+            active.extend([s,d])
+            if s > max_N:
+                max_N = s
+            if d > max_N:
+                max_N = d
+        active = set(active)
+        for n in active:
+            if n in node_activity:
+                node_activity[n] += 1
+            else:
+                node_activity[n] = 1
+    
+
+for n in range(max_N+1):
+    if n in node_activity:
+        print node_activity[n]
+    else:
+        print 0
+
+
+
+
diff --git a/structure/activity/node_activity_vectors.py b/structure/activity/node_activity_vectors.py
new file mode 100644
index 0000000..9839334
--- /dev/null
+++ b/structure/activity/node_activity_vectors.py
@@ -0,0 +1,68 @@
+####
+##
+## Take as input the layers of a multiplex, and provide as output a
+## file where the n-th line contains the degrees of the n-th node at
+## each layer, separated by a space, in the format:
+##
+## node1_deglay1 node1_deglay2 .... node1_deglayM
+## node2_deglay1 node2_deglay2 .... node2_deglayM
+## ..............................................
+## nodeN_deglay1 nodeN_deglay2 .... nodeN_deglayM
+##
+##
+
+import sys
+
+if len(sys.argv) < 2:
+    print "Usage: %s <layer1> [<layer2>...]" % sys.argv[0]
+    sys.exit(1)
+
+node_degrees = {}
+
+max_N = -1
+
+num_layer = 0
+
+for layer in sys.argv[1:]:
+    with open(layer, "r") as lines:
+        for l in lines:
+            if l[0] == "#":
+                continue
+            
+            s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]]
+
+            if s > max_N:
+                max_N = s
+            if d > max_N:
+                max_N = d
+
+            if s in node_degrees:
+                if num_layer in node_degrees[s]:
+                    node_degrees[s][num_layer] += 1
+                else:
+                    node_degrees[s][num_layer] = 1
+            else:
+                node_degrees[s] = {}
+                node_degrees[s][num_layer] = 1
+
+            if d in node_degrees:
+                if num_layer in node_degrees[d]:
+                    node_degrees[d][num_layer] += 1
+                else:
+                    node_degrees[d][num_layer] = 1
+            else:
+                node_degrees[d] = {}
+                node_degrees[d][num_layer] = 1
+    num_layer += 1
+
+
+for n in range(max_N+1):
+    for i in range(num_layer):
+        if n in node_degrees:
+            if i in node_degrees[n]:
+                print 1,
+            else:
+                print 0,
+        else:
+            print 0,
+    print
diff --git a/structure/activity/node_degree_vectors.py b/structure/activity/node_degree_vectors.py
new file mode 100644
index 0000000..8863daa
--- /dev/null
+++ b/structure/activity/node_degree_vectors.py
@@ -0,0 +1,68 @@
+####
+##
+## Take as input the layers of a multiplex, and provide as output a
+## file where the n-th line contains the degrees of the n-th node at
+## each layer, separated by a space, in the format:
+##
+## node1_deglay1 node1_deglay2 .... node1_deglayM
+## node2_deglay1 node2_deglay2 .... node2_deglayM
+## ..............................................
+## nodeN_deglay1 nodeN_deglay2 .... nodeN_deglayM
+##
+##
+
+import sys
+
+if len(sys.argv) < 2:
+    print "Usage: %s <layer1> [<layer2>...]" % sys.argv[0]
+    sys.exit(1)
+
+node_degrees = {}
+
+max_N = -1
+
+num_layer = 0
+
+for layer in sys.argv[1:]:
+    with open(layer, "r") as lines:
+        for l in lines:
+            if l[0] == "#":
+                continue
+            
+            s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]]
+
+            if s > max_N:
+                max_N = s
+            if d > max_N:
+                max_N = d
+
+            if s in node_degrees:
+                if num_layer in node_degrees[s]:
+                    node_degrees[s][num_layer] += 1
+                else:
+                    node_degrees[s][num_layer] = 1
+            else:
+                node_degrees[s] = {}
+                node_degrees[s][num_layer] = 1
+
+            if d in node_degrees:
+                if num_layer in node_degrees[d]:
+                    node_degrees[d][num_layer] += 1
+                else:
+                    node_degrees[d][num_layer] = 1
+            else:
+                node_degrees[d] = {}
+                node_degrees[d][num_layer] = 1
+    num_layer += 1
+
+
+for n in range(max_N+1):
+    for i in range(num_layer):
+        if n in node_degrees:
+            if i in node_degrees[n]:
+                print node_degrees[n][i],
+            else:
+                print 0,
+        else:
+            print 0,
+    print
diff --git a/structure/correlations/Makefile b/structure/correlations/Makefile
new file mode 100644
index 0000000..58301cb
--- /dev/null
+++ b/structure/correlations/Makefile
@@ -0,0 +1,15 @@
+CFLAGS="-O3"
+CC="gcc"
+GSL_FLAGS=-lgsl -lgslcblas -lm
+MFLAG=-lm
+
+all: dump_k_q fit_knn
+
+fit_knn: fit_knn.c fit_utils.c
+	$(CC) $(CFLAGS) -o fit_knn fit_knn.c fit_utils.c $(GSL_FLAGS)
+
+dump_k_q: dump_k_q.c rank_utils.c
+	$(CC) $(CFLAGS) -o dump_k_q dump_k_q.c rank_utils.c $(MFLAG) 
+
+clean:
+	rm dump_k_q fit_knn
diff --git a/structure/correlations/compute_pearson.py b/structure/correlations/compute_pearson.py
new file mode 100644
index 0000000..e2c9055
--- /dev/null
+++ b/structure/correlations/compute_pearson.py
@@ -0,0 +1,33 @@
+####
+##
+## Compute the pearson correlation coefficient between the values of
+## node properties included in the two files provided as input.
+##
+
+import sys
+import numpy
+import scipy.stats
+import math
+
+if len(sys.argv) < 3:
+    print "Usage %s <file1> <file2>" % sys.argv[0]
+    sys.exit(1)
+
+
+x1 = []
+
+with open(sys.argv[1], "r") as lines:
+    for l in lines:
+        elem = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split()][0]
+        x1.append(elem)
+
+x2 = []
+
+with open(sys.argv[2], "r") as lines:
+    for l in lines:
+        elem = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split()][0]
+        x2.append(elem)
+
+    
+r2 =numpy.corrcoef(x1,x2)
+print r2[0][1]
diff --git a/structure/correlations/compute_rho.py b/structure/correlations/compute_rho.py
new file mode 100644
index 0000000..8fa4174
--- /dev/null
+++ b/structure/correlations/compute_rho.py
@@ -0,0 +1,118 @@
+####
+##
+## Get two rankings and a pairing, and compute the corresponding
+## Spearman's rho rank correlation coefficient
+##
+##
+##
+##
+
+import sys
+import random
+import math
+#import scipy.stats
+
+##
+## Compute the constant C of the Spearman's rho correlation coefficient in the draft
+## 
+def compute_C(rank1, rank2):
+    N = len(rank1)
+    [mu1, mu2] = [1.0 * sum(x.values()) / len(x.values()) for x in [rank1, rank2]]
+    [sum1, sum2] = [1.0 * sum(x.values()) for x in [rank1, rank2]]
+    C = N * mu1 * mu2 - mu2 * sum1 - mu1 * sum2
+
+    #print mu1, mu2, sum1, sum2, C
+
+    return C
+
+
+##
+## Compute the constant D of the Spearman's rho correlation coefficient in the draft
+## 
+def compute_D(rank1, rank2):
+    [mu1, mu2] = [1.0 * sum(x.values()) / len(x.values()) for x in [rank1, rank2]]
+    s1 = sum([math.pow((x-mu1), 2) for x in rank1.values()])
+    s2 = sum([math.pow((x-mu2), 2) for x in rank2.values()])
+    
+    D = math.sqrt(s1 * s2)
+    return D
+
+def compute_rho(rank1, rank2, pairing, C, D):
+
+    rho = 0
+    for s,t in pairing:
+        rho += rank1[s] * rank2[t]
+    rho = (rho + C) / D
+    return rho
+
+if len(sys.argv) < 3:
+    print "Usage: %s <rank1> <rank2> [<pairing>]" % sys.argv[0]
+    sys.exit(1)
+
+rank1 = {}
+rank2 = {}
+
+lines = open(sys.argv[1], "r").readlines()
+
+i = 0
+for l in lines:
+    if l[0] == "#" or l.strip(" \n").split(" ") == []: ## Strip comments and empty lines
+        continue
+    r = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split(" ")][0]
+    rank1[i] = r
+    i += 1
+
+
+lines = open(sys.argv[2], "r").readlines()
+
+i = 0
+for l in lines:
+    if l[0] == "#" or l.strip(" \n").split(" ") == []: ## Strip comments and empty lines
+        continue
+    r = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split(" ")][0]
+    rank2[i] = r
+    i += 1
+
+
+N1 = len(rank1)
+N2 = len(rank2)
+
+if (N1 != N2):
+    print "The two files must have the same number of nodes!!!!!"
+    sys.exit(2)
+
+
+
+C = compute_C(rank1, rank2)
+D = compute_D(rank1, rank2)
+
+
+## We start from a random pairing, and compute the corresponding value of rho
+
+pairing = []
+
+if len(sys.argv) > 3:
+    lines = open(sys.argv[3], "r").readlines()
+    
+    
+
+    for l in lines:
+        if l[0] == "#" or l.strip(" \n").split(" ") == []:
+            continue
+        p1, p2 = [int(x) for x in l.strip(" \n").split(" ")][:2]
+        pairing.append((p1, p2))
+
+else:
+    for i in range (N1):
+        pairing.append((i,i))
+
+
+if len(pairing) != N1:
+    print "ERROR !!! The pairing should be of the same length of the ranking files!!!"
+    sys.exit(1)
+
+rho = compute_rho(rank1, rank2, pairing, C, D)
+
+print rho
+
+
diff --git a/structure/correlations/compute_tau.py b/structure/correlations/compute_tau.py
new file mode 100644
index 0000000..3d1f804
--- /dev/null
+++ b/structure/correlations/compute_tau.py
@@ -0,0 +1,133 @@
+####
+##
+## Take as input two files, whose n^th line contains the ranking of
+## element n, and compute the Kendall's \tau_b rank correlation
+## coefficient
+##
+##
+
+import sys
+from numpy import *
+
+
+def kendalltau(x,y):
+    initial_sort_with_lexsort = True # if True, ~30% slower (but faster under profiler!) but with better worst case (O(n log(n)) than (quick)sort (O(n^2))
+    n = len(x)
+    temp = range(n) # support structure used by mergesort
+    # this closure recursively sorts sections of perm[] by comparing 
+    # elements of y[perm[]] using temp[] as support
+    # returns the number of swaps required by an equivalent bubble sort
+    def mergesort(offs, length):
+        exchcnt = 0
+        if length == 1:
+            return 0
+        if length == 2:
+            if y[perm[offs]] <= y[perm[offs+1]]:
+                return 0
+            t = perm[offs]
+            perm[offs] = perm[offs+1]
+            perm[offs+1] = t
+            return 1
+        length0 = length / 2
+        length1 = length - length0
+        middle = offs + length0
+        exchcnt += mergesort(offs, length0)
+        exchcnt += mergesort(middle, length1)
+        if y[perm[middle - 1]] < y[perm[middle]]:
+            return exchcnt
+        # merging
+        i = j = k = 0
+        while j < length0 or k < length1:
+            if k >= length1 or (j < length0 and y[perm[offs + j]] <= y[perm[middle + k]]):
+                temp[i] = perm[offs + j]
+                d = i - j
+                j += 1
+            else:
+                temp[i] = perm[middle + k]
+                d = (offs + i) - (middle + k)
+                k += 1
+            if d > 0:
+                exchcnt += d;
+            i += 1
+        perm[offs:offs+length] = temp[0:length]
+        return exchcnt
+    
+    # initial sort on values of x and, if tied, on values of y
+    if initial_sort_with_lexsort:
+        # sort implemented as mergesort, worst case: O(n log(n))
+        perm = lexsort((y, x))
+    else:
+        # sort implemented as quicksort, 30% faster but with worst case: O(n^2)
+        perm = range(n)
+        perm.sort(lambda a,b: cmp(x[a],x[b]) or cmp(y[a],y[b]))
+    
+    # compute joint ties
+    first = 0
+    t = 0
+    for i in xrange(1,n):
+        if x[perm[first]] != x[perm[i]] or y[perm[first]] != y[perm[i]]:
+            t += ((i - first) * (i - first - 1)) / 2
+            first = i
+    t += ((n - first) * (n - first - 1)) / 2
+    
+    # compute ties in x
+    first = 0
+    u = 0
+    for i in xrange(1,n):
+        if x[perm[first]] != x[perm[i]]:
+            u += ((i - first) * (i - first - 1)) / 2
+            first = i
+    u += ((n - first) * (n - first - 1)) / 2
+    
+    # count exchanges 
+    exchanges = mergesort(0, n)
+    # compute ties in y after mergesort with counting
+    first = 0
+    v = 0
+    for i in xrange(1,n):
+        if y[perm[first]] != y[perm[i]]:
+            v += ((i - first) * (i - first - 1)) / 2
+            first = i
+    v += ((n - first) * (n - first - 1)) / 2
+    
+    tot = (n * (n - 1)) / 2
+    if tot == u and tot == v:
+        return 1    # Special case for all ties in both ranks
+    
+    tau = ((tot-(v+u-t)) - 2.0 * exchanges) / (sqrt(float(( tot - u )) * float( tot - v )))
+    
+    # what follows reproduces ending of Gary Strangman's original stats.kendalltau() in SciPy
+    svar = (4.0*n+10.0) / (9.0*n*(n-1))
+    z = tau / sqrt(svar)
+    ##prob = erfc(abs(z)/1.4142136)
+    ##return tau, prob
+    return tau
+
+def main():
+
+    if len(sys.argv) < 3:
+        print "Usage: %s <file1> <file2>" % sys.argv[0]
+        sys.exit(1)
+
+    x1 = []
+    x2= []
+
+    lines = open(sys.argv[1]).readlines()
+
+    for l in lines:
+        elem = [float(x) if "e" in x or "." in x else int(x) for x in l.strip(" \n").split()][0]
+        x1.append(elem)
+
+    lines = open(sys.argv[2]).readlines()
+
+    for l in lines:
+        elem = [float(x) if "e" in x or "." in x else int(x) for x in l.strip(" \n").split()][0]
+        x2.append(elem)
+    
+
+    tau = kendalltau(x1,x2)
+    print tau
+
+
+if  __name__ == "__main__":
+    main()
diff --git a/structure/correlations/dump_k_q.c b/structure/correlations/dump_k_q.c
new file mode 100644
index 0000000..2b6ba79
--- /dev/null
+++ b/structure/correlations/dump_k_q.c
@@ -0,0 +1,50 @@
+/**
+ *
+ * Get the degree distributions of two layers and a pairing, and dump
+ * on output the pairs k-q 
+ *
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+void dump_k_q(double *R1, double *R2, int N, int *pairing){
+
+  int i;
+
+  for (i=0; i<N; i++){
+    printf("%g %g\n", R1[i], R2[pairing[i]]);
+  }
+}
+
+
+int main(int argc, char *argv[]){
+
+  int N1, N2;
+  double *R1 = NULL;
+  double *R2 = NULL;
+  
+  int *pairing = NULL;
+
+
+  if (argc < 4){
+    printf("Usage: %s <degs1> <degs2> <pairing>\n", argv[0]);
+    exit(1);
+  }
+
+  load_ranking(argv[1], &N1, &R1);
+  load_ranking(argv[2], &N2, &R2);
+  
+  if (N1 != N2){
+    printf("Error!!!! The two files must have the same number of lines!!!! Exiting...\n");
+    exit(1);
+  }
+  
+  pairing = malloc(N1 * sizeof(int));
+  
+  load_pairing(&pairing, N1, argv[3]);
+  
+  dump_k_q(R1, R2, N1, pairing);
+
+}
diff --git a/structure/correlations/fit_knn.c b/structure/correlations/fit_knn.c
new file mode 100644
index 0000000..64fc4c3
--- /dev/null
+++ b/structure/correlations/fit_knn.c
@@ -0,0 +1,59 @@
+/**
+ * 
+ * Take a file which contains pairs in the form:
+ *
+ * k q
+ *
+ * compute qnn(k) and fit this function with a power-law
+ *
+ * a*x^{b}
+ *
+ * The fit is actually performed as a linear fit on the
+ * exponential-binned log-log distribution
+ *
+ *
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <gsl/gsl_fit.h>
+#include "fit_utils.h"
+
+
+
+int main(int argc, char *argv[]){
+
+  double *k, *q, *x, *y;
+  int N, num;
+  double alpha;
+
+  double c0, c1, cov00, cov01, cov11, sqsum;
+
+  k=q=x=y=NULL;
+
+  if (argc < 3){
+    printf("%s <filein> <alpha>\n", argv[0]);
+    exit(1);
+  }
+  
+  alpha=atof(argv[2]);
+  
+  fprintf(stderr,"Alpha: %g\n", alpha);
+  
+  load_sequence_col(argv[1], &k, &N, 0);
+  load_sequence_col(argv[1], &q, &N, 1);
+  //exit(1);
+  exp_bin_avg(k, q, N, alpha, &x, &y, &num);
+  //normalize_distr(x, y, num);
+  //dump_distr(x, y, num);
+  compact_distr(x, y, &num);
+  //  dump_distr(x,y,num);
+  map_vec(x, num, log);
+  map_vec(y, num, log);
+  //dump_distr(x,y,num);
+
+  gsl_fit_linear(x, 1, y, 1, num, &c0, &c1, &cov00, &cov01, &cov11, &sqsum);
+  fprintf(stderr, "a: %g b: %g\n", exp(c0), c1);
+}
diff --git a/structure/correlations/fit_utils.c b/structure/correlations/fit_utils.c
new file mode 100644
index 0000000..e9e3954
--- /dev/null
+++ b/structure/correlations/fit_utils.c
@@ -0,0 +1,382 @@
+/**
+ *
+ * Fit a given sequence with a power-law function
+ *
+ * a*x^{b}
+ *
+ * The fit is actually performed as a linear fit on the
+ * exponential-binned log-log distribution
+ *
+ *
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <gsl/gsl_fit.h>
+
+/**
+ * 
+ * Load a sequence from a file, which contains one element on each line
+ *
+ */
+
+void load_sequence(char *fname, double **v, int *N){
+
+  int size;
+  char buff[256];
+  
+  FILE *f;
+  
+  f = fopen(fname, "r");
+  if (!f){
+    fprintf(stderr, "Error opening file %s!!!! Exiting...\n", fname);
+    exit(1);
+  }
+  
+  *N =0;
+  size = 10;
+  if (*v == NULL)
+    *v = malloc(size * sizeof(double));
+  else{
+    *v = realloc(*v, size * sizeof(double));
+  }
+  
+  while (fgets(buff, 255, f)){
+    (*v)[*N] = atof(buff);
+    *N += 1;
+    if (*N == size){
+      size += 10;
+      *v = realloc(*v, size * sizeof(double));
+    }
+  }
+  *v = realloc(*v, (*N) * sizeof(double));
+  fclose(f);
+}
+
+
+/**
+ *
+ * Load a sequence, getting the "col"-th column of the input file
+ *
+ */
+
+void load_sequence_col(char *fname, double **v, int *N, int col){
+
+  int size, n;
+  char buff[256];
+  char *ptr;
+
+  FILE *f;
+  
+  f = fopen(fname, "r");
+  if (!f){
+    fprintf(stderr, "Error opening file %s!!!! Exiting...\n", fname);
+    exit(1);
+  }
+  
+  *N =0;
+  size = 10;
+  if (*v == NULL)
+    *v = malloc(size * sizeof(double));
+  else{
+    *v = realloc(*v, size * sizeof(double));
+  }
+  
+  while (fgets(buff, 255, f)){
+    ptr = strtok(buff, " ");
+    if (col > 0){
+      n = 0;
+      while (n<col){
+        ptr = strtok(NULL, " ");
+        n += 1;
+      }
+    }
+    (*v)[*N] = atof(ptr);
+    *N += 1;
+
+    if (*N == size){
+      size += 10;
+      *v = realloc(*v, size * sizeof(double));
+    }
+  }
+  *v = realloc(*v, (*N) * sizeof(double));
+  fclose(f);
+}
+
+
+/**
+ *
+ * Make the exponential binning of a distribution, with a giving
+ * exponent alpha. The value of y[i] is the number of elements of v
+ * whose value lies between x[i-1] and x[i]...
+ *
+ */
+
+void exp_bin_cnt(double *v, int N, double alpha, double **x, double **y, int *num){
+  
+  double min_v, max_v, val, last_val;
+  int i, j, size, num_x;
+  double last_size, new_size;
+  
+  min_v = max_v = v[0];
+  
+  for (i=1; i<N; i ++){
+    if (v[i] > max_v)
+      max_v = v[i];
+    else if (v[i] > 0 && v[i] < min_v)
+      min_v = v[i];
+  }
+
+  size = 10;
+  if (*x == NULL){
+    *x = malloc(size * sizeof(double));
+  }
+  else{
+    *x = realloc(*x, size * sizeof(double));
+  }
+  
+  val = min_v;
+  last_size = min_v;
+  (*x)[0] = min_v;
+  num_x = 1;
+
+  while(val < max_v){
+    new_size = last_size * alpha;
+    val = last_size + new_size;
+    last_size = new_size;
+    last_val = val;
+    (*x)[num_x] = val;
+    num_x += 1;
+    if (num_x == size){
+      size += 10;
+      *x = realloc(*x, size * sizeof(double));
+    }
+  }
+  
+  if (*y == NULL){
+    *y = malloc(num_x * sizeof(double));
+  }
+  else{
+    *y = realloc(*y, num_x * sizeof(double));
+  }
+  for (i=0; i < num_x; i++){
+    (*y)[i] = 0;
+  }
+
+  
+
+  for(i=0; i <N; i ++){
+    j = 0;
+    while(v[i] > (*x)[j]){
+      j ++;
+    }
+    (*y)[j] += 1;
+  }
+  *num = num_x;
+}
+
+/**
+ *
+ * Make the exponential binning of a distribution, with a giving
+ * exponent alpha. The value of y[i] is the average of the values in
+ * the vector "w" whose corresponding v lies between x[i-1] and  x[i]...
+ *
+ */
+
+
+void exp_bin_avg(double *v, double *w, int N, double alpha, double **x, double **y, int *num){
+  
+  double min_v, max_v, val, last_val;
+  int i, j, size, num_x;
+  double last_size, new_size;
+  int *cnt;
+
+
+  min_v = max_v = v[0];
+  
+  for (i=1; i<N; i ++){
+    if (v[i] > max_v)
+      max_v = v[i];
+    else if (v[i] > 0 && v[i] < min_v)
+      min_v = v[i];
+  }
+
+  size = 10;
+  if (*x == NULL){
+    *x = malloc(size * sizeof(double));
+  }
+  else{
+    *x = realloc(*x, size * sizeof(double));
+  }
+  
+  val = min_v;
+  last_size = min_v;
+  (*x)[0] = min_v;
+  num_x = 1;
+
+  while(val < max_v){
+    new_size = last_size * alpha;
+    val = last_size + new_size;
+    last_size = new_size;
+    last_val = val;
+    (*x)[num_x] = val;
+    num_x += 1;
+    if (num_x == size){
+      size += 10;
+      *x = realloc(*x, size * sizeof(double));
+    }
+  }
+  
+  
+  cnt = malloc(num_x * sizeof(int));
+  
+  if (*y == NULL){
+    *y = malloc(num_x * sizeof(double));
+  }
+  else{
+    *y = realloc(*y, num_x * sizeof(double));
+  }
+  for (i=0; i < num_x; i++){
+    (*y)[i] = 0;
+    cnt[i] = 0;
+  }
+
+  for(i=0; i <N; i ++){
+    j = 0;
+    while(j < num_x && v[i] > (*x)[j]){
+      j ++;
+    }
+    if(j == num_x){
+      printf("Error!!!!! Trying to assing a non-existing bin!!! -- fit_utils.c:exp_bin_avg!!!\n");
+      exit(37);
+    }
+    (*y)[j] += w[i];
+    cnt[j] += 1;
+  }
+  *num = num_x;
+  
+  for(i=0; i< num_x; i++){
+    if (cnt[i] > 0){
+      (*y)[i] = (*y)[i] / cnt[i];
+    }
+  }
+  free(cnt);
+}
+
+
+/**
+ *
+ * Print a distribution on stdout
+ *
+ */
+
+void dump_distr(double *x, double *y, int N){
+  int i;
+  
+  for(i=0; i<N; i ++){
+    printf("%g %g\n", x[i], y[i]);
+  }
+  
+}
+
+
+/**
+ * Compact a distribution, leaving only the pairs (x_i, y_i) for which
+ * y_i > 0
+ *
+ */
+
+void compact_distr(double *x, double *y, int *num){
+  
+  int i, j;
+  
+  i = j = 0;
+  while(j < *num){
+    while(j < *num && y[j] == 0){
+      j ++;
+    }
+    if (j==*num){
+      break;
+    }
+    x[i] = x[j];
+    y[i] = y[j];
+    i ++;
+    j ++;
+  }
+  *num = i;
+}
+
+
+/**
+ *
+ * Apply the function "f" on all the elemnts of a vector, in-place
+ *
+ */
+
+void map_vec(double *v, int N, double (*f)(double)){
+  int i;
+  
+  for (i=0; i<N; i ++){
+    v[i] = f(v[i]);
+  }
+}
+
+
+/**
+ *
+ * Normalize a distribution, dividing each y[i] for the width of the
+ * corresponding bin (i.e., x[i] - x[i-1])
+ *
+ */
+void normalize_distr(double *x, double *y, int num){
+  
+  int i;
+  
+  for(i=1; i<num; i ++){
+    y[i] /= (x[i] - x[i-1]);
+  }
+}
+
+/**
+ *
+ * take two vectors (k and q) and a pairing, and compute the best
+ * power-law fit "a*k^{mu}" of qnn(k)
+ *
+ */
+
+void fit_current_trend(double *k, double *q, int N, int *pairing, double *mu, double *a, 
+                       double *corr){
+
+  static int  *num;
+  static double *q_pair, *x, *y;
+
+  int i;
+  int fit_num;
+  double c0, c1, cov00, cov01, cov11, sqsum;
+  
+  if (q_pair == NULL){
+    q_pair = malloc(N * sizeof(double));
+  }
+
+  for(i=0; i<N; i++){
+    q_pair[i] = q[pairing[i]];
+  }
+
+  exp_bin_avg(k, q_pair, N, 1.3, &x, &y, &fit_num);
+  //normalize_distr(x,y, fit_num);
+  compact_distr(x, y, &fit_num);
+  
+  map_vec(x, fit_num, log);
+  map_vec(y, fit_num, log);
+  gsl_fit_linear(x, 1, y, 1, fit_num, &c0, &c1, &cov00, &cov01, &cov11, &sqsum);
+
+  //fprintf(stderr,"cov00: %g cov01: %g cov11: %g\n", cov00, cov01, cov11);
+  //fprintf(stderr, "corr: %g ", cov01 / sqrt(cov00 * cov11));
+  *a = c0;
+  *mu = c1;
+  *corr = cov01/sqrt(cov00 * cov11);
+}
+
diff --git a/structure/correlations/fit_utils.h b/structure/correlations/fit_utils.h
new file mode 100644
index 0000000..183f47c
--- /dev/null
+++ b/structure/correlations/fit_utils.h
@@ -0,0 +1,25 @@
+#ifndef __FIT_UTILS_H__
+#define __FIT_UTILS_H__
+
+
+void load_sequence(char *fname, double **v, int *N);
+
+void load_sequence_col(char *fname, double **v, int *N, int col);
+
+void exp_bin_cnt(double *v, int N, double alpha, double **x, double **y, int *num);
+
+void exp_bin_avg(double *v, double *w, int N, double alpha, double **x, double **y, int *num);
+
+void dump_distr(double *x, double *y, int N);
+
+void compact_distr(double *x, double *y, int *num);
+
+void map_vec(double *v, int N, double (*f)(double));
+
+void normalize_distr(double *x, double *y, int num);
+
+void fit_current_trend(double *k, double *q, int N, int *pairing, 
+                       double *mu, double *a, double *corr);
+
+
+#endif /* __FIT_UTILS_H__ */
diff --git a/structure/correlations/knn_q_from_degrees.py b/structure/correlations/knn_q_from_degrees.py
new file mode 100644
index 0000000..5e10bfa
--- /dev/null
+++ b/structure/correlations/knn_q_from_degrees.py
@@ -0,0 +1,49 @@
+####
+##
+##
+## Take a file with the degree sequence of nodes which are active on
+## both layers, and compute \overline{k}(q) and \overline{q}(k),
+## i.e. the two inter-layer correlation functions, where we call "k"
+## the degree on the first column, and "q" the degree on the second
+## column
+##
+##
+
+import sys
+
+if len(sys.argv) < 2:
+    print "Usage: %s <filein>" % sys.argv[0]
+    sys.exit(1)
+
+qnn_k = {}
+knn_q = {}
+
+with open(sys.argv[1]) as f:
+    for l in f:
+        k, q = [int(x) for x in l.strip(" \n").split(" ")]
+        if qnn_k.has_key(k):
+            qnn_k[k].append(q)
+        else:
+            qnn_k[k] = [q]
+        if knn_q.has_key(q):
+            knn_q[q].append(k)
+        else:
+            knn_q[q] = [k]
+
+
+
+
+keys= qnn_k.keys()
+keys.sort()
+for k in keys:
+    print k, 1.0 * sum(qnn_k[k])/len(qnn_k[k])
+
+
+
+
+keys= knn_q.keys()
+keys.sort()
+for q in keys:
+    sys.stderr.write("%d %f\n" % (q, 1.0 * sum(knn_q[q])/len(knn_q[q])))
+
+
diff --git a/structure/correlations/knn_q_from_layers.py b/structure/correlations/knn_q_from_layers.py
new file mode 100644
index 0000000..c108c18
--- /dev/null
+++ b/structure/correlations/knn_q_from_layers.py
@@ -0,0 +1,98 @@
+####
+##
+##
+## Compute the degree-degree correlations of a multiplex graph, namely:
+##
+## <k_1>(k_2) and <k_2>(k_1)
+##
+## Takes as input the two lists of edges corresponding to each layer
+##
+
+import sys
+import numpy as np
+import networkx as net
+
+
+def knn(G, n):
+    neigh = G.neighbors(n)
+    l = G.degree(neigh).values()
+    return 1.0 * sum(l) / len(l) 
+
+
+if len(sys.argv) < 2:
+    print "Usage: %s <layer1> <layer2>" % sys.argv[0]
+    sys.exit(1)
+
+
+G1 = net.read_edgelist(sys.argv[1])
+G2 = net.read_edgelist(sys.argv[2])
+
+k1_k1 = {} ## Intraleyer knn (k1)
+k2_k2 = {} ## Intralayer knn (k2)
+k1_k2 = {} ## Interlayer average degree at layer 1 of a node having degree k_2 in layer 2
+k2_k1 = {} ## Interlayer average degree at layer 2 of a node having degree k_1 in layer 1
+
+
+for n in G1.nodes():
+    k1 = G1.degree(n)
+
+    
+    ##print k1,k2
+
+    knn1 = knn(G1, n)
+    if n in G2.nodes():
+        k2 = G2.degree(n)
+        knn2 = knn(G2, n)
+    else:
+        k2 = 0
+        knn2 = 0
+
+    if k1_k1.has_key(k1):
+        k1_k1[k1].append(knn1)
+    else:
+        k1_k1[k1] = [knn1]
+
+    if k2_k2.has_key(k2):
+        k2_k2[k2].append(knn2)
+    else:
+        k2_k2[k2] = [knn2]
+        
+
+    if k1_k2.has_key(k2):
+        k1_k2[k2].append(k1)
+    else:
+        k1_k2[k2] = [k1]
+
+    if k2_k1.has_key(k1):
+        k2_k1[k1].append(k2)
+    else:
+        k2_k1[k1] = [k2]
+        
+        
+k1_keys = k1_k1.keys()
+k1_keys.sort()
+k2_keys = k2_k2.keys()
+k2_keys.sort()
+
+
+f = open("%s_%s_k1" % (sys.argv[1], sys.argv[2]), "w+")
+
+for n in k1_keys:
+    avg_knn = np.mean(k1_k1[n])
+    std_knn = np.std(k1_k1[n])
+    avg_k2 = np.mean(k2_k1[n])
+    std_k2 = np.std(k2_k1[n])
+    f.write("%d %f %f %f %f\n" % (n, avg_knn, std_knn, avg_k2, std_k2))
+
+f.close()
+
+f = open("%s_%s_k2" % (sys.argv[1], sys.argv[2]), "w+")
+
+for n in k2_keys:
+    avg_knn = np.mean(k2_k2[n])
+    std_knn = np.std(k2_k2[n])
+    avg_k1 = np.mean(k1_k2[n])
+    std_k1 = np.std(k1_k2[n])
+    f.write("%d %f %f %f %f\n" % (n, avg_knn, std_knn, avg_k1, std_k1))
+f.close()
+
diff --git a/structure/correlations/rank_nodes.py b/structure/correlations/rank_nodes.py
new file mode 100644
index 0000000..6985e88
--- /dev/null
+++ b/structure/correlations/rank_nodes.py
@@ -0,0 +1,74 @@
+####
+##
+##
+## Get a file as input, whose n-th line corresponds to the value of a
+## certain property of node n, and rank nodes according to their
+## properties, taking into account ranking ties properly.
+##
+## The output is a file whose n-th line is the "ranking" of the n-th
+## node according to the given property. (notice that rankings could
+## be fractional, due to the tie removal algorithm)
+##
+##
+
+
+import sys
+import math
+
+
+if len(sys.argv) < 2:
+    print "Usage: %s <filein>" % sys.argv[0]
+    sys.exit(1)
+
+
+
+lines = open(sys.argv[1], "r").readlines()
+
+ranking = []
+
+n=0
+for l in lines:
+    if l[0] == "#" or l.strip(" \n").split(" ") == []:
+        continue
+    v = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split(" ")][0]
+    ranking.append((v,n))
+    n +=1
+
+ranking.sort(reverse=True)
+#print ranking
+new_ranking = {}
+
+v0, n0 = ranking[0]
+
+old_value = v0
+tot_rankings = 1
+
+stack = [n0]
+l=1.0
+for v,n in ranking[1:]:
+    l += 1
+    ##print stack, tot_rankings
+    if v != old_value: ### There is a new rank
+        # We first compute the rank for all the nodes in the stack and then we set it
+        new_rank_value = 1.0 * tot_rankings / len(stack)
+        ##print new_rank_value
+        for j in stack:
+            new_ranking[j] = new_rank_value
+        old_value = v
+        tot_rankings = l
+        stack = [n]
+    else: # One more value with the same rank, keep it for the future
+        stack.append(n)
+        tot_rankings += l
+
+new_rank_value = 1.0 * tot_rankings / len(stack)
+#print new_rank_value
+for j in stack:
+    new_ranking[j] = new_rank_value
+
+#print new_ranking
+
+keys = new_ranking.keys()
+keys.sort()
+for k in keys:
+    print new_ranking[k]
diff --git a/structure/correlations/rank_nodes_thresh.py b/structure/correlations/rank_nodes_thresh.py
new file mode 100644
index 0000000..44b565a
--- /dev/null
+++ b/structure/correlations/rank_nodes_thresh.py
@@ -0,0 +1,87 @@
+####
+##
+##
+## Get a file as input, whose n-th line corresponds to the value of a
+## certain property of node n, and rank nodes according to their
+## properties, taking into account ranking ties properly.
+##
+## The output is a file whose n-th line is the "ranking" of the n-th
+## node according to the given property. (notice that rankings could
+## be fractional, due to the tie removal algorithm)
+##
+## The rank of a node is set to "0" (ZERO) if the corresponding
+## property is smaller than a value given as second parameter
+##
+
+
+import sys
+import math
+
+
+if len(sys.argv) < 3:
+    print "Usage: %s <filein> <thresh>" % sys.argv[0]
+    sys.exit(1)
+
+
+thresh = float(sys.argv[2])
+
+lines = open(sys.argv[1], "r").readlines()
+
+ranking = []
+
+n=0
+for l in lines:
+    if l[0] == "#" or l.strip(" \n").split(" ") == []:
+        continue
+    v = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split(" ")][0]
+    if v >= thresh:
+        ranking.append((v,n))
+    else:
+        ranking.append((0,n))
+    n +=1
+
+ranking.sort(reverse=True)
+#print ranking
+new_ranking = {}
+
+v0, n0 = ranking[0]
+
+
+old_value = v0
+tot_rankings = 1
+
+stack = [n0]
+l=1.0
+for v,n in ranking[1:]:
+    l += 1
+    ##print stack, tot_rankings
+    if v != old_value: ### There is a new rank
+        # We first compute the rank for all the nodes in the stack and then we set it
+        if old_value == 0:
+            new_rank_value = 0
+        else:
+            new_rank_value = 1.0 * tot_rankings / len(stack)
+        ##print new_rank_value
+        for j in stack:
+            new_ranking[j] = new_rank_value
+        old_value = v
+        tot_rankings = l
+        stack = [n]
+    else: # One more value with the same rank, keep it for the future
+        stack.append(n)
+        tot_rankings += l
+
+if v == 0 :
+    new_rank_value = 0
+else:
+    new_rank_value = 1.0 * tot_rankings / len(stack)
+#print new_rank_value
+for j in stack:
+    new_ranking[j] = new_rank_value
+
+#print new_ranking
+
+keys = new_ranking.keys()
+keys.sort()
+for k in keys:
+    print new_ranking[k]
diff --git a/structure/correlations/rank_occurrence.py b/structure/correlations/rank_occurrence.py
new file mode 100644
index 0000000..6339d5d
--- /dev/null
+++ b/structure/correlations/rank_occurrence.py
@@ -0,0 +1,45 @@
+####
+##
+## Get two rankings and compute the size of the k-intersection,
+## i.e. the number of elements which are present in the first k
+## positions of both rankings, as a function of k
+##
+##
+
+
+import sys
+
+
+if len(sys.argv)< 4:
+    print "Usage: %s <file1> <file2> <increment>" % sys.argv[0]
+    sys.exit(1)
+
+incr = int(sys.argv[3])
+
+rank1 = []
+rank2 = []
+
+lines = open(sys.argv[1], "r").readlines()
+
+for l in lines:
+    n= l.strip(" \n").split(" ")[0]
+    rank1.append(n)
+
+lines = open(sys.argv[2], "r").readlines()
+
+for l in lines:
+    n= l.strip(" \n").split(" ")[0]
+    rank2.append(n)
+
+
+N = len(rank1)
+
+i = incr
+
+while i < N+incr:
+    l = len(set(rank1[:i]) & set(rank2[:i]))
+    print i, l
+    i += incr
+
+
+
diff --git a/structure/correlations/rank_utils.c b/structure/correlations/rank_utils.c
new file mode 100644
index 0000000..4b8ef41
--- /dev/null
+++ b/structure/correlations/rank_utils.c
@@ -0,0 +1,217 @@
+/**
+ *
+ * Some utility functions to be used with tune_rho, compute_rho and
+ * the like
+ *
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+#include <time.h>
+
+#include "rank_utils.h"
+
+void load_ranking(char *fname, int *N, double **R){
+  
+  char buff[256];
+  int size = 10;
+  FILE *f;
+  
+  if (*R == NULL){
+    *R = malloc(size * sizeof (double));
+  }
+  f = fopen(fname, "r");
+  if (!f){
+    printf("Unable to open file: %s!!! Exiting\n");
+    exit(1);
+  }
+  
+  *N = 0;
+ 
+  while (fgets(buff, 255, f)){
+    if (* N == size){
+      size += 10;
+      *R = realloc(*R, size * sizeof(double));
+    }
+    (*R)[*N] = atof(buff);
+    *N += 1;
+  }
+}
+
+double avg_array(double *v, int N){
+  
+  double sum = 0.0;
+  int i;
+  
+  for (i = 0; i < N; i ++){
+    sum += v[i];
+  }
+  return sum/N;
+}
+
+double compute_C(double *R1, double *R2, int N){
+  double mu1, mu2, sum1, sum2;
+
+  mu1 = avg_array(R1, N);
+  mu2 = avg_array(R2, N);
+  
+  sum1 = mu1 * N;
+  sum2 = mu2 * N;
+  
+  return N * mu1 * mu2 - mu2 * sum1 - mu1 * sum2;
+}
+
+
+double compute_D(double *R1, double *R2, int N){
+  
+  double mu1, mu2, s1, s2;
+  int i;
+
+  mu1 = avg_array(R1, N);
+  mu2 = avg_array(R1, N);
+  
+  s1 = s2 = 0.0;
+  
+  for (i=0 ; i < N; i ++){
+    s1 += pow((R1[i] - mu1), 2);
+    s2 += pow((R2[i] - mu2), 2);
+  }
+  
+  return sqrt(s1 * s2);
+}
+
+
+double compute_rho(double *R1, double *R2, int N, int *pairing){
+  
+  double rho = 0;
+  int i;
+  
+  for (i=0; i < N; i ++){
+    rho += R1[i] * R2[ pairing[i] ];
+  }
+
+  rho = (rho + compute_C(R1, R2, N))/ compute_D(R1, R2, N);
+  return rho;
+}
+
+void dump_ranking(double *R, int N){
+  int i;
+
+  for (i=0; i < N; i ++){
+    printf("%d: %f\n", i, R[i] );
+  }
+}
+
+
+void init_pairing_natural(int *pairing, int N){
+  int i;
+
+  for (i = 0; i< N; i ++){
+    pairing[i] = i;
+  }
+}
+
+void init_pairing_inverse(int *pairing, int N){
+  int i;
+
+  for (i = 0; i< N; i ++){
+    pairing[i] = N-i-1;
+  }
+}
+
+void select_pairing(int *pairing, int N, int argc, char *argv[], int pos){
+
+  if (argc < pos + 1 || !strncasecmp("rnd", argv[pos], 3)){
+    init_pairing_random(pairing, N);
+  }
+  else if (!strncasecmp("nat", argv[pos], 3)){
+    init_pairing_natural(pairing, N);
+  }
+  else if (!strncasecmp("inv", argv[pos], 3)){
+    init_pairing_inverse(pairing, N);
+  }
+  else{
+    printf ("Pairing strategy \"%s\" unknown!!! Exiting...\n", argv[pos]);
+    exit(1);
+  }
+
+}
+
+
+void shuffle_sequence(int *s, int N){
+  
+  int i, j, tmp;
+
+  for (i=N-1; i>=0; i--){
+    j = rand() % (i+1);
+    tmp = s[j];
+    s[j] = s[i];
+    s[i] = tmp;
+  }
+}
+
+
+
+void init_pairing_random(int *pairing, int N){
+
+  init_pairing_natural(pairing, N);
+  shuffle_sequence(pairing, N);
+
+}
+
+/* Loads a pairing from a file, in the format:
+ * 
+ * rank1 rank2
+ * ...........
+ */
+void load_pairing(int **pairing, int N, char *fname){
+  
+  FILE *f;
+  int i, j, num;
+  char buff[256];
+  char *ptr;
+
+  f = fopen(fname, "r");
+  if (!f){
+    printf("Error opening file \"%s\"!!!! Exiting....\n", fname);
+    exit(2);
+  }
+
+  if (*pairing == NULL){
+    *pairing = malloc(N * sizeof(int));
+    init_pairing_natural(*pairing, N);
+  }
+  
+  num = 0;
+  while(num < N){
+    fgets(buff, 255, f);
+    if (buff[0] == '#')
+      continue;
+    ptr = strtok(buff, " "); /* read the first node */
+    i = atoi(ptr);
+    ptr = strtok(NULL, " "); /* read the second node */
+    j = atoi(ptr);
+    (*pairing)[i] = j;
+    num += 1;
+  }
+}
+
+
+void dump_pairing(int *pairing, int N){
+  int i;
+
+  for(i=0; i< N; i ++){
+    printf("%d %d\n", i, pairing[i]);
+  }
+}
+
+void copy_pairing(int *p1, int *p2, int N){
+  int i;
+  
+  for (i=0 ; i < N; i ++){
+    p2[i] = p1[i];
+  }
+}
diff --git a/structure/correlations/rank_utils.h b/structure/correlations/rank_utils.h
new file mode 100644
index 0000000..521582a
--- /dev/null
+++ b/structure/correlations/rank_utils.h
@@ -0,0 +1,44 @@
+#ifndef __RANK_UTILS_H__
+#define __RANK_UTILS_H__
+
+/* Load a ranking */
+void load_ranking(char *fname, int *N, double **R);
+
+/* Compute the average of an array */
+double avg_array(double *v, int N);
+
+/* Compute the term "C" in the rho correlation coefficient */
+double compute_C(double *R1, double *R2, int N);
+
+/* Compute the term "D" in the rho correlation coefficient */
+double compute_D(double *R1, double *R2, int N);
+
+/* Compute the Spearman's rank correlation coefficient \rho */
+double compute_rho(double *R1, double *R2, int N, int *pairing);
+
+void dump_ranking(double *R, int N);
+
+void init_pairing_natural(int *pairing, int N);
+
+void init_pairing_inverse(int *pairing, int N);
+
+void init_pairing_random(int *pairing, int N);
+
+void select_pairing(int *pairing, int N, int argc, char *argv[], int pos);
+
+void shuffle_sequence(int *s, int N);
+
+void dump_pairing(int *pairing, int N);
+
+void copy_pairing(int *pairing1, int *pairing2, int N);
+
+
+#endif /* __RANK_UTILS_H__ */
+
+
+
+
+
+
+
+
diff --git a/structure/metrics/aggregate_layers_w.py b/structure/metrics/aggregate_layers_w.py
new file mode 100644
index 0000000..cd5ea1d
--- /dev/null
+++ b/structure/metrics/aggregate_layers_w.py
@@ -0,0 +1,37 @@
+####
+## 
+## Aggregate the layers of a multiplex, weigthing each edge according
+## to the number of times it occurs in the different layers
+##
+
+import sys
+import networkx as net
+
+
+if len(sys.argv) < 3:
+    print "Usage: %s <file1> <file2> [<file3>....]" % sys.argv(0)
+    sys.exit(1)
+
+G = net.Graph()
+
+lines = open(sys.argv[1], "r").readlines()
+
+for l in lines:
+    s,d = [int(x) for x in l.strip(" \n").split(" ")[:2]]
+    G.add_edge(s,d)
+    G[s][d]['weigth'] = 1
+
+for f in sys.argv[2:]:
+    lines = open(f, "r").readlines()
+    for l in lines:
+        s,d = [int(x) for x in l.strip(" \n").split(" ")[:2]]
+        if (G.has_edge(s,d)):
+            G[s][d]['weigth'] += 1
+        else:
+            G.add_edge(s,d)
+            G[s][d]['weigth'] = 1
+
+for s,d in G.edges():
+    print s,d, G[s][d]['weigth']
+        
+
diff --git a/structure/metrics/avg_edge_overlap.py b/structure/metrics/avg_edge_overlap.py
new file mode 100644
index 0000000..e68fd8b
--- /dev/null
+++ b/structure/metrics/avg_edge_overlap.py
@@ -0,0 +1,47 @@
+####
+##
+## Compute the average edge overlap of a multiplex, i.e. the average
+## number of layers in which an edge is present
+##
+##
+
+import sys
+
+
+if len(sys.argv) < 2:
+    print "Usage: %s <layer1> [<layer2>...]" % sys.argv[0]
+    sys.exit(1)
+
+max_N = -1
+
+all_edges = {}
+
+layer_ID = -1
+
+for layer in sys.argv[1:]:
+    layer_ID += 1
+    with open(layer, "r") as lines:
+        for l in lines:
+            if l[0] == "#":
+                continue
+            s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]]
+            if s > d:
+                tmp = s
+                s = d
+                d = tmp
+            if (s,d) in all_edges:
+                all_edges[(s,d)].append(layer_ID)
+            else:
+                all_edges[(s,d)] = [layer_ID]
+                
+K = len(all_edges.keys())
+M = len(sys.argv) - 1
+
+numer = 0
+
+for k in all_edges.keys():
+    numer += len(set(all_edges[(k)]))
+
+
+print 1.0 * numer / K, 1.0 * numer / (K * M)
+
diff --git a/structure/metrics/cartography_from_columns.py b/structure/metrics/cartography_from_columns.py
new file mode 100644
index 0000000..a2343ed
--- /dev/null
+++ b/structure/metrics/cartography_from_columns.py
@@ -0,0 +1,44 @@
+####
+##
+## Make a cartography (i.e., sum and participation coefficient) based
+## on the values found at the given column numbers of the file given
+## as input, e.g.:
+##
+##    cartography_cols.py FILEIN 1 5 8 14
+##
+## makes the assumption that the system has 4 layers, and that the
+## quantities involved in the cartography are at the 2nd, 6th, 9th and
+## 15th column of FILEIN
+## 
+##
+##
+
+import sys
+
+if len(sys.argv) < 4:
+    print "Usage: %s <filein> <col1> <col2> [<col3> <col4>...]" % sys.argv[0]
+    sys.exit(1)
+
+filein=sys.argv[1]
+cols = [int(x) for x in sys.argv[2:]]
+
+M = len(cols)
+
+with open(filein,"r") as f:
+    for l in f:
+        if l[0] == "#":
+            continue
+        elems = [float(x) if "e" in x or "." in x else int(x) for x in l.strip(" \n").split(" ")]
+        sum_elems = 0
+        part = 0
+        for c in cols:
+            val = elems[c]
+            sum_elems += val
+            part += val*val
+        if sum_elems > 0:
+            part = M * 1.0 / (M -1) * (1 - part * 1.0 / (sum_elems * sum_elems))
+        else:
+            part = 0.0
+        print elems[0], sum_elems, part
+
+        
diff --git a/structure/metrics/cartography_from_deg_vectors.py b/structure/metrics/cartography_from_deg_vectors.py
new file mode 100644
index 0000000..c40d701
--- /dev/null
+++ b/structure/metrics/cartography_from_deg_vectors.py
@@ -0,0 +1,37 @@
+####
+##
+## Take as input a file containing, on each line, the degree vector of
+## a node of the multiplex, and compute the multiplex cartography
+## diagram
+##
+##
+
+import sys
+
+if len(sys.argv) < 2:
+    print "Usage: %s <node_deg_vectors>" % sys.argv[0]
+    sys.exit(1)
+
+filein=sys.argv[1]
+
+M = -1
+
+with open(filein,"r") as lines:
+    for l in lines:
+        if l[0] == "#":
+            continue
+        elems = [int(x) for x in l.strip(" \n").split(" ")]
+        if (M == -1):
+            M = len(elems)
+        sum_elems = 0
+        part = 0
+        for val in elems:
+            sum_elems += val
+            part += val*val
+        if sum_elems > 0:
+            part = M * 1.0 / (M -1) * (1 - part * 1.0 / (sum_elems * sum_elems))
+        else:
+            part = 0.0
+        print sum_elems, part
+
+        
diff --git a/structure/metrics/cartography_from_layers.py b/structure/metrics/cartography_from_layers.py
new file mode 100644
index 0000000..b9c4584
--- /dev/null
+++ b/structure/metrics/cartography_from_layers.py
@@ -0,0 +1,54 @@
+####
+##
+## Compute the participation coefficient of each node of a multiplex
+## 
+##
+
+import sys
+import networkx as net
+import collections
+from compiler.ast import flatten
+
+
+
+if len(sys.argv) < 3:
+    print "Usage: %s <layer1> <layer2> [<layer3>...]" % sys.argv[0]
+    sys.exit(1)
+
+
+layers = []
+
+for f in sys.argv[1:]:
+    G = net.Graph(net.read_edgelist(f, nodetype=int))
+    layers.append(G)
+
+nodes = flatten([x for j in layers for x in j.nodes()])
+#nodes.sort()
+nodes = set(nodes)
+
+M = len(layers)
+
+#print nodes
+
+for n in nodes:
+    deg_alpha_square = 0
+    deg = 0
+    col = 0
+    print n, 
+    for l in layers:
+        val = l.degree([n])
+        if not val:
+            col = 2* col
+            continue
+        col *= 2
+        col += 1
+        val = val[n]
+        deg += val
+        deg_alpha_square += val*val
+    if deg > 0:
+        print  deg, 1.0 * M / (M-1) * (1.0 - 1.0 * deg_alpha_square/(deg * deg)) , col
+    else:
+        print  0 , 0, col
+
+    
+    
diff --git a/structure/metrics/edge_overlap.py b/structure/metrics/edge_overlap.py
new file mode 100644
index 0000000..1b27f55
--- /dev/null
+++ b/structure/metrics/edge_overlap.py
@@ -0,0 +1,43 @@
+####
+##
+## Compute the edge overlap of each edge of the multiplex, i.e. the
+## number of times that each edge appears in the multiplex
+##
+##
+
+import sys
+
+
+if len(sys.argv) < 2:
+    print "Usage: %s <layer1> [<layer2>...]" % sys.argv[0]
+    sys.exit(1)
+
+max_N = -1
+
+all_edges = {}
+
+layer_ID = -1
+
+for layer in sys.argv[1:]:
+    layer_ID += 1
+    with open(layer, "r") as lines:
+        for l in lines:
+            if l[0] == "#":
+                continue
+            s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]]
+            if s > d:
+                tmp = s
+                s = d
+                d = tmp
+            if (s,d) in all_edges:
+                all_edges[(s,d)].append(layer_ID)
+            else:
+                all_edges[(s,d)] = [layer_ID]
+                
+K = len(all_edges.keys())
+
+for k in all_edges.keys():
+    val = len(set(all_edges[(k)]))
+    s,d = k
+    print s, d, val
+
diff --git a/structure/metrics/intersect_layers.py b/structure/metrics/intersect_layers.py
new file mode 100644
index 0000000..a9da00c
--- /dev/null
+++ b/structure/metrics/intersect_layers.py
@@ -0,0 +1,47 @@
+####
+##
+## Take a certain number of networks as input and give as output the
+## corresponding intersection graph, i.e. the graph in which an edge
+## exists between i and j if and only if it exists in ALL the graphs
+## given as input
+##
+
+
+import sys
+
+
+if len(sys.argv)< 3:
+    print "Usage: %s <file1> <file2> [<file3>....]" % sys.argv[0]
+    sys.exit(1)
+
+
+graphs = {}
+
+num = 0
+
+for fname in sys.argv[1:]:
+
+    lines = open(fname).readlines()
+    graphs[num] = []
+    for l in lines:
+        s,d = [int(x) for x in l.strip(" \n").split(" ")][:2]
+        if s > d:
+            graphs[num].append((d,s))
+        else:
+            graphs[num].append((d,s))
+    num += 1
+
+#print graphs
+
+
+for edge in graphs[0]:
+    to_print = True
+    for i in range(1, num):
+        if edge not in graphs[i]:
+            to_print = False
+            break
+    if to_print:
+        s,d = edge
+        print s,d
+
+
diff --git a/structure/metrics/overlap_degree.py b/structure/metrics/overlap_degree.py
new file mode 100644
index 0000000..fa69434
--- /dev/null
+++ b/structure/metrics/overlap_degree.py
@@ -0,0 +1,47 @@
+####
+##
+## Compute the overlapping degree for each node and the corresponding
+## z-score 
+##
+##
+
+import sys
+import numpy
+
+
+if len(sys.argv) < 2:
+    print "Usage: %s <layer1> <layer2> [<layer3>...]" % sys.argv[0]
+    sys.exit(1)
+
+
+nodes = {}
+
+for f in sys.argv[1:]:
+
+    lines = open(f).readlines()
+
+    for l in lines:
+        if l[0] == "#":
+            continue
+        s,d = [int(x) for x in l.strip(" \n").split(" ")]
+        if nodes.has_key(s):
+            nodes[s] +=1
+        else:
+            nodes[s] = 1
+        if nodes.has_key(d):
+            nodes[d] +=1
+        else:
+            nodes[d] = 1
+
+
+degrees = nodes.values()
+avg_deg = numpy.mean(degrees)
+std_deg = numpy.std(degrees)
+
+#print avg_deg, std_deg
+
+keys = nodes.keys()
+keys.sort()
+
+for n in keys:
+    print n, nodes[n], (nodes[n] - avg_deg)/std_deg
diff --git a/structure/reinforcement/reinforcement.py b/structure/reinforcement/reinforcement.py
new file mode 100644
index 0000000..5939e59
--- /dev/null
+++ b/structure/reinforcement/reinforcement.py
@@ -0,0 +1,61 @@
+import networkx as nx
+import sys
+
+
+if __name__ == "__main__":
+
+	
+	if len(sys.argv) < 6:
+	    print "Usage: %s <layer1> <layer2> <N bins> <min_value> <max_value>" % sys.argv[0]
+	    sys.exit(1)
+
+
+	filename_a = sys.argv[1]
+	filename_b = sys.argv[2]
+	
+	intervals=int(sys.argv[3])
+	minvalue=float(sys.argv[4])
+	maxvalue=float(sys.argv[5])
+	
+	
+	tot_a = []
+	pos_a = []
+	for t in range (intervals):
+		tot_a.append(0)
+		pos_a.append(0)
+
+	
+
+	
+	fa=open(filename_a, 'r')
+	Ga=nx.read_adjlist(fa)
+	
+	
+	fb=open(filename_b, 'r')
+	Gb=nx.read_weighted_edgelist(fb)
+	
+	
+
+	
+	for u,v in Gb.edges():
+		Gbw=Gb[u][v]['weight']
+		for i in range (intervals):
+			a=minvalue+float(maxvalue-minvalue)*float(i)/intervals
+			b=minvalue+float(maxvalue-minvalue)*float(i+1)/intervals
+			if (Gbw>a and Gbw<b):
+				tot_a[i]+=1
+				break
+		if (Ga.has_edge(u,v)==True):
+			pos_a[i]+=1
+
+	freq_a=[]
+	for i in range (intervals):
+		if (tot_a[i]>0):
+			freq_a.append(float(pos_a[i])/tot_a[i])
+		else:
+			freq_a.append(0)
+	print "#bin_minvalue bin_maxvalue frequence"
+	for i in range (intervals):
+		a=minvalue+float(maxvalue-minvalue)*float(i)/intervals
+		b=minvalue+float(maxvalue-minvalue)*float(i+1)/intervals
+		print a, b, freq_a[i]
author	KatolaZ <katolaz@yahoo.it>	2015-10-19 16:23:00 +0100
committer	KatolaZ <katolaz@yahoo.it>	2015-10-19 16:23:00 +0100
commit	df8386f75b0538075d72d52693836bb8878f505b (patch)
tree	704c2a0836f8b9fd9f470c12b6ae05637c431468 /structure
parent	363274e79eade464247089c105260bc34940da07 (diff)