diff options
| author | KatolaZ <katolaz@yahoo.it> | 2015-10-19 16:23:00 +0100 | 
|---|---|---|
| committer | KatolaZ <katolaz@yahoo.it> | 2015-10-19 16:23:00 +0100 | 
| commit | df8386f75b0538075d72d52693836bb8878f505b (patch) | |
| tree | 704c2a0836f8b9fd9f470c12b6ae05637c431468 /structure | |
| parent | 363274e79eade464247089c105260bc34940da07 (diff) | |
First commit of MAMMULT code
Diffstat (limited to 'structure')
33 files changed, 2267 insertions, 0 deletions
diff --git a/structure/activity/degs_to_activity_overlap.py b/structure/activity/degs_to_activity_overlap.py new file mode 100644 index 0000000..c96959b --- /dev/null +++ b/structure/activity/degs_to_activity_overlap.py @@ -0,0 +1,36 @@ +#### +## +## Take a file which contains, on the n-th line, the degrees at each +## layer of the n-th node, and print on output the corresponding node +## multi-activity (i.e., the number of layers in which the node is +## active) and the overlapping degree (i.e., the total number of edges +## incident on the node) +##  +## + + +import sys + +def to_binary(l): +    s = 0 +    e = 0 +    for v in l: +        s += v * pow(2,e) +        e +=1 +    return s + +if len(sys.argv) < 2: +    print "Usage: %s <filein>" % sys.argv[0] +    sys.exit(1) + +distr = {} + +with open(sys.argv[1]) as f: +    for l in f: +        elems = [int(x) for x in l.strip(" \n").split(" ")] +        ov = sum(elems) +        new_list = [1 if x>0 else 0 for x in elems] +        multi_act = sum(new_list) +        if multi_act and ov: +            print ov, multi_act +     diff --git a/structure/activity/degs_to_binary.py b/structure/activity/degs_to_binary.py new file mode 100644 index 0000000..cb7aef5 --- /dev/null +++ b/structure/activity/degs_to_binary.py @@ -0,0 +1,45 @@ +#### +## +## Take a file which contains, on the n-th line, the degrees at each +## layer of the n-th node, and print on output the corresponding node +## participation bit-strings, i.e. the string which contains "1" if on +## that layer the node is connected, and zero otherwise +##  +## on the stderr, we also dump the distribution of each bit-string +## + + +import sys + +def to_binary(l): +    s = 0 +    e = 0 +    for v in l: +        s += v * pow(2,e) +        e +=1 +    return s + +if len(sys.argv) < 2: +    print "Usage: %s <filein>" % sys.argv[0] +    sys.exit(1) + +distr = {} + +with open(sys.argv[1]) as f: +    for l in f: +        elems = [int(x) for x in l.strip(" \n").split(" ")] +        new_list = [1 if x>0 else 0 for x in elems] +        val = to_binary(new_list) +        if val in distr: +            distr[val] += 1 +        else: +            distr[val] = 1 +        for i in new_list: +            print i, +        print + +for k in distr: +    bin_list = bin(k) +    bin_num = sum([int(x) if x=='1' else 0 for x in bin_list[2:]]) +    sys.stderr.write("%d %028s %d \n" % (bin_num, bin_list[2:], distr[k])) +     diff --git a/structure/activity/hamming_dist.py b/structure/activity/hamming_dist.py new file mode 100644 index 0000000..2a98e64 --- /dev/null +++ b/structure/activity/hamming_dist.py @@ -0,0 +1,41 @@ +#### +## +## Take as input the layers of a multiplex, and provide as output a +## file where the n-th line contains the activity of the n-th +## layer.  +## +## + + +import sys + +if len(sys.argv) < 3: +    print "Usage: %s <layer1> <layer2> [<layer3>...]" % sys.argv[0] +    sys.exit(1) + +max_N = -1 + +layers = [] + + +for layer in sys.argv[1:]: +    active = [] +    with open(layer, "r") as lines: +        for l in lines: +            if l[0] == "#": +                continue +             +            s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]] +            if s > max_N: +                max_N = s +            if d > max_N: +                max_N = d +            active.extend([s,d]) +        active = set(active) +        layers.append(active) + +for i in range(len(layers)): +    for j in range(i+1, len(layers)): +        s = layer[i] ^ layer[j] +        print i, j, len(s)*1.0 / min(len(layer[i]) + len(layer[j]), max_N+1) + diff --git a/structure/activity/layer_activity.py b/structure/activity/layer_activity.py new file mode 100644 index 0000000..bea0416 --- /dev/null +++ b/structure/activity/layer_activity.py @@ -0,0 +1,27 @@ +#### +## +## Take as input the layers of a multiplex, and provide as output a +## file where the n-th line contains the activity of the n-th +## layer.  +## +## + + +import sys + +if len(sys.argv) < 2: +    print "Usage: %s <layer1> [<layer2>...]" % sys.argv[0] +    sys.exit(1) + + +for layer in sys.argv[1:]: +    active = [] +    with open(layer, "r") as lines: +        for l in lines: +            if l[0] == "#": +                continue +             +            s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]] +            active.extend([s,d]) +        active = set(active) +        print len(active) diff --git a/structure/activity/layer_activity_vectors.py b/structure/activity/layer_activity_vectors.py new file mode 100644 index 0000000..f32418d --- /dev/null +++ b/structure/activity/layer_activity_vectors.py @@ -0,0 +1,44 @@ +#### +## +## Take as input the layers of a multiplex, and provide as output a +## file where the n-th line contains the activity of the n-th +## layer.  +## +## + + +import sys + +if len(sys.argv) < 2: +    print "Usage: %s <layer1> [<layer2>...]" % sys.argv[0] +    sys.exit(1) + +max_N = -1 + +layers = [] + + +for layer in sys.argv[1:]: +    active = [] +    with open(layer, "r") as lines: +        for l in lines: +            if l[0] == "#": +                continue +             +            s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]] +            if s > max_N: +                max_N = s +            if d > max_N: +                max_N = d +            active.extend([s,d]) +        active = set(active) +        layers.append(active) + +for l in layers: +    for n in range(max_N+1): +        if n in l: +            print 1, +        else: +            print 0, +    print + diff --git a/structure/activity/multiplexity.py b/structure/activity/multiplexity.py new file mode 100644 index 0000000..9e038c9 --- /dev/null +++ b/structure/activity/multiplexity.py @@ -0,0 +1,41 @@ +#### +## +## Take as input the layers of a multiplex, and provide as output a +## file where the n-th line contains the activity of the n-th +## layer.  +## +## + + +import sys + +if len(sys.argv) < 3: +    print "Usage: %s <layer1> <layer2> [<layer3>...]" % sys.argv[0] +    sys.exit(1) + +max_N = -1 + +layers = [] + + +for layer in sys.argv[1:]: +    active = [] +    with open(layer, "r") as lines: +        for l in lines: +            if l[0] == "#": +                continue +             +            s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]] +            if s > max_N: +                max_N = s +            if d > max_N: +                max_N = d +            active.extend([s,d]) +        active = set(active) +        layers.append(active) + +for i in range(len(layers)): +    for j in range(i+1, len(layers)): +        s = layer[i] & layer[j] +        print i, j, len(s)*1.0 / (max_N+1) + diff --git a/structure/activity/node_activity.py b/structure/activity/node_activity.py new file mode 100644 index 0000000..6410a68 --- /dev/null +++ b/structure/activity/node_activity.py @@ -0,0 +1,51 @@ +#### +## +## Take as input the layers of a multiplex, and provide as output a +## file where the n-th line contains the activity of the n-th node. We +## assume that nodes are numbered sequentially, starting from 0, with +## no gaps (i.e., missing nodes are treated as isolated nodes) +## +## +## + + +import sys + +if len(sys.argv) < 2: +    print "Usage: %s <layer1> [<layer2>...]" % sys.argv[0] +    sys.exit(1) + +node_activity = {} + +max_N = -1 + +for layer in sys.argv[1:]: +    active = [] +    with open(layer, "r") as lines: +        for l in lines: +            if l[0] == "#": +                continue +             +            s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]] +            active.extend([s,d]) +            if s > max_N: +                max_N = s +            if d > max_N: +                max_N = d +        active = set(active) +        for n in active: +            if n in node_activity: +                node_activity[n] += 1 +            else: +                node_activity[n] = 1 +     + +for n in range(max_N+1): +    if n in node_activity: +        print node_activity[n] +    else: +        print 0 + + + + diff --git a/structure/activity/node_activity_vectors.py b/structure/activity/node_activity_vectors.py new file mode 100644 index 0000000..9839334 --- /dev/null +++ b/structure/activity/node_activity_vectors.py @@ -0,0 +1,68 @@ +#### +## +## Take as input the layers of a multiplex, and provide as output a +## file where the n-th line contains the degrees of the n-th node at +## each layer, separated by a space, in the format: +## +## node1_deglay1 node1_deglay2 .... node1_deglayM +## node2_deglay1 node2_deglay2 .... node2_deglayM +## .............................................. +## nodeN_deglay1 nodeN_deglay2 .... nodeN_deglayM +## +## + +import sys + +if len(sys.argv) < 2: +    print "Usage: %s <layer1> [<layer2>...]" % sys.argv[0] +    sys.exit(1) + +node_degrees = {} + +max_N = -1 + +num_layer = 0 + +for layer in sys.argv[1:]: +    with open(layer, "r") as lines: +        for l in lines: +            if l[0] == "#": +                continue +             +            s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]] + +            if s > max_N: +                max_N = s +            if d > max_N: +                max_N = d + +            if s in node_degrees: +                if num_layer in node_degrees[s]: +                    node_degrees[s][num_layer] += 1 +                else: +                    node_degrees[s][num_layer] = 1 +            else: +                node_degrees[s] = {} +                node_degrees[s][num_layer] = 1 + +            if d in node_degrees: +                if num_layer in node_degrees[d]: +                    node_degrees[d][num_layer] += 1 +                else: +                    node_degrees[d][num_layer] = 1 +            else: +                node_degrees[d] = {} +                node_degrees[d][num_layer] = 1 +    num_layer += 1 + + +for n in range(max_N+1): +    for i in range(num_layer): +        if n in node_degrees: +            if i in node_degrees[n]: +                print 1, +            else: +                print 0, +        else: +            print 0, +    print diff --git a/structure/activity/node_degree_vectors.py b/structure/activity/node_degree_vectors.py new file mode 100644 index 0000000..8863daa --- /dev/null +++ b/structure/activity/node_degree_vectors.py @@ -0,0 +1,68 @@ +#### +## +## Take as input the layers of a multiplex, and provide as output a +## file where the n-th line contains the degrees of the n-th node at +## each layer, separated by a space, in the format: +## +## node1_deglay1 node1_deglay2 .... node1_deglayM +## node2_deglay1 node2_deglay2 .... node2_deglayM +## .............................................. +## nodeN_deglay1 nodeN_deglay2 .... nodeN_deglayM +## +## + +import sys + +if len(sys.argv) < 2: +    print "Usage: %s <layer1> [<layer2>...]" % sys.argv[0] +    sys.exit(1) + +node_degrees = {} + +max_N = -1 + +num_layer = 0 + +for layer in sys.argv[1:]: +    with open(layer, "r") as lines: +        for l in lines: +            if l[0] == "#": +                continue +             +            s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]] + +            if s > max_N: +                max_N = s +            if d > max_N: +                max_N = d + +            if s in node_degrees: +                if num_layer in node_degrees[s]: +                    node_degrees[s][num_layer] += 1 +                else: +                    node_degrees[s][num_layer] = 1 +            else: +                node_degrees[s] = {} +                node_degrees[s][num_layer] = 1 + +            if d in node_degrees: +                if num_layer in node_degrees[d]: +                    node_degrees[d][num_layer] += 1 +                else: +                    node_degrees[d][num_layer] = 1 +            else: +                node_degrees[d] = {} +                node_degrees[d][num_layer] = 1 +    num_layer += 1 + + +for n in range(max_N+1): +    for i in range(num_layer): +        if n in node_degrees: +            if i in node_degrees[n]: +                print node_degrees[n][i], +            else: +                print 0, +        else: +            print 0, +    print diff --git a/structure/correlations/Makefile b/structure/correlations/Makefile new file mode 100644 index 0000000..58301cb --- /dev/null +++ b/structure/correlations/Makefile @@ -0,0 +1,15 @@ +CFLAGS="-O3" +CC="gcc" +GSL_FLAGS=-lgsl -lgslcblas -lm +MFLAG=-lm + +all: dump_k_q fit_knn + +fit_knn: fit_knn.c fit_utils.c +	$(CC) $(CFLAGS) -o fit_knn fit_knn.c fit_utils.c $(GSL_FLAGS) + +dump_k_q: dump_k_q.c rank_utils.c +	$(CC) $(CFLAGS) -o dump_k_q dump_k_q.c rank_utils.c $(MFLAG)  + +clean: +	rm dump_k_q fit_knn diff --git a/structure/correlations/compute_pearson.py b/structure/correlations/compute_pearson.py new file mode 100644 index 0000000..e2c9055 --- /dev/null +++ b/structure/correlations/compute_pearson.py @@ -0,0 +1,33 @@ +#### +## +## Compute the pearson correlation coefficient between the values of +## node properties included in the two files provided as input. +## + +import sys +import numpy +import scipy.stats +import math + +if len(sys.argv) < 3: +    print "Usage %s <file1> <file2>" % sys.argv[0] +    sys.exit(1) + + +x1 = [] + +with open(sys.argv[1], "r") as lines: +    for l in lines: +        elem = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split()][0] +        x1.append(elem) + +x2 = [] + +with open(sys.argv[2], "r") as lines: +    for l in lines: +        elem = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split()][0] +        x2.append(elem) + +     +r2 =numpy.corrcoef(x1,x2) +print r2[0][1] diff --git a/structure/correlations/compute_rho.py b/structure/correlations/compute_rho.py new file mode 100644 index 0000000..8fa4174 --- /dev/null +++ b/structure/correlations/compute_rho.py @@ -0,0 +1,118 @@ +#### +## +## Get two rankings and a pairing, and compute the corresponding +## Spearman's rho rank correlation coefficient +## +## +## +## + +import sys +import random +import math +#import scipy.stats + +## +## Compute the constant C of the Spearman's rho correlation coefficient in the draft +##  +def compute_C(rank1, rank2): +    N = len(rank1) +    [mu1, mu2] = [1.0 * sum(x.values()) / len(x.values()) for x in [rank1, rank2]] +    [sum1, sum2] = [1.0 * sum(x.values()) for x in [rank1, rank2]] +    C = N * mu1 * mu2 - mu2 * sum1 - mu1 * sum2 + +    #print mu1, mu2, sum1, sum2, C + +    return C + + +## +## Compute the constant D of the Spearman's rho correlation coefficient in the draft +##  +def compute_D(rank1, rank2): +    [mu1, mu2] = [1.0 * sum(x.values()) / len(x.values()) for x in [rank1, rank2]] +    s1 = sum([math.pow((x-mu1), 2) for x in rank1.values()]) +    s2 = sum([math.pow((x-mu2), 2) for x in rank2.values()]) +     +    D = math.sqrt(s1 * s2) +    return D + +def compute_rho(rank1, rank2, pairing, C, D): + +    rho = 0 +    for s,t in pairing: +        rho += rank1[s] * rank2[t] +    rho = (rho + C) / D +    return rho + +if len(sys.argv) < 3: +    print "Usage: %s <rank1> <rank2> [<pairing>]" % sys.argv[0] +    sys.exit(1) + +rank1 = {} +rank2 = {} + +lines = open(sys.argv[1], "r").readlines() + +i = 0 +for l in lines: +    if l[0] == "#" or l.strip(" \n").split(" ") == []: ## Strip comments and empty lines +        continue +    r = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split(" ")][0] +    rank1[i] = r +    i += 1 + + +lines = open(sys.argv[2], "r").readlines() + +i = 0 +for l in lines: +    if l[0] == "#" or l.strip(" \n").split(" ") == []: ## Strip comments and empty lines +        continue +    r = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split(" ")][0] +    rank2[i] = r +    i += 1 + + +N1 = len(rank1) +N2 = len(rank2) + +if (N1 != N2): +    print "The two files must have the same number of nodes!!!!!" +    sys.exit(2) + + + +C = compute_C(rank1, rank2) +D = compute_D(rank1, rank2) + + +## We start from a random pairing, and compute the corresponding value of rho + +pairing = [] + +if len(sys.argv) > 3: +    lines = open(sys.argv[3], "r").readlines() +     +     + +    for l in lines: +        if l[0] == "#" or l.strip(" \n").split(" ") == []: +            continue +        p1, p2 = [int(x) for x in l.strip(" \n").split(" ")][:2] +        pairing.append((p1, p2)) + +else: +    for i in range (N1): +        pairing.append((i,i)) + + +if len(pairing) != N1: +    print "ERROR !!! The pairing should be of the same length of the ranking files!!!" +    sys.exit(1) + +rho = compute_rho(rank1, rank2, pairing, C, D) + +print rho + + diff --git a/structure/correlations/compute_tau.py b/structure/correlations/compute_tau.py new file mode 100644 index 0000000..3d1f804 --- /dev/null +++ b/structure/correlations/compute_tau.py @@ -0,0 +1,133 @@ +#### +## +## Take as input two files, whose n^th line contains the ranking of +## element n, and compute the Kendall's \tau_b rank correlation +## coefficient +## +## + +import sys +from numpy import * + + +def kendalltau(x,y): +    initial_sort_with_lexsort = True # if True, ~30% slower (but faster under profiler!) but with better worst case (O(n log(n)) than (quick)sort (O(n^2)) +    n = len(x) +    temp = range(n) # support structure used by mergesort +    # this closure recursively sorts sections of perm[] by comparing  +    # elements of y[perm[]] using temp[] as support +    # returns the number of swaps required by an equivalent bubble sort +    def mergesort(offs, length): +        exchcnt = 0 +        if length == 1: +            return 0 +        if length == 2: +            if y[perm[offs]] <= y[perm[offs+1]]: +                return 0 +            t = perm[offs] +            perm[offs] = perm[offs+1] +            perm[offs+1] = t +            return 1 +        length0 = length / 2 +        length1 = length - length0 +        middle = offs + length0 +        exchcnt += mergesort(offs, length0) +        exchcnt += mergesort(middle, length1) +        if y[perm[middle - 1]] < y[perm[middle]]: +            return exchcnt +        # merging +        i = j = k = 0 +        while j < length0 or k < length1: +            if k >= length1 or (j < length0 and y[perm[offs + j]] <= y[perm[middle + k]]): +                temp[i] = perm[offs + j] +                d = i - j +                j += 1 +            else: +                temp[i] = perm[middle + k] +                d = (offs + i) - (middle + k) +                k += 1 +            if d > 0: +                exchcnt += d; +            i += 1 +        perm[offs:offs+length] = temp[0:length] +        return exchcnt +     +    # initial sort on values of x and, if tied, on values of y +    if initial_sort_with_lexsort: +        # sort implemented as mergesort, worst case: O(n log(n)) +        perm = lexsort((y, x)) +    else: +        # sort implemented as quicksort, 30% faster but with worst case: O(n^2) +        perm = range(n) +        perm.sort(lambda a,b: cmp(x[a],x[b]) or cmp(y[a],y[b])) +     +    # compute joint ties +    first = 0 +    t = 0 +    for i in xrange(1,n): +        if x[perm[first]] != x[perm[i]] or y[perm[first]] != y[perm[i]]: +            t += ((i - first) * (i - first - 1)) / 2 +            first = i +    t += ((n - first) * (n - first - 1)) / 2 +     +    # compute ties in x +    first = 0 +    u = 0 +    for i in xrange(1,n): +        if x[perm[first]] != x[perm[i]]: +            u += ((i - first) * (i - first - 1)) / 2 +            first = i +    u += ((n - first) * (n - first - 1)) / 2 +     +    # count exchanges  +    exchanges = mergesort(0, n) +    # compute ties in y after mergesort with counting +    first = 0 +    v = 0 +    for i in xrange(1,n): +        if y[perm[first]] != y[perm[i]]: +            v += ((i - first) * (i - first - 1)) / 2 +            first = i +    v += ((n - first) * (n - first - 1)) / 2 +     +    tot = (n * (n - 1)) / 2 +    if tot == u and tot == v: +        return 1    # Special case for all ties in both ranks +     +    tau = ((tot-(v+u-t)) - 2.0 * exchanges) / (sqrt(float(( tot - u )) * float( tot - v ))) +     +    # what follows reproduces ending of Gary Strangman's original stats.kendalltau() in SciPy +    svar = (4.0*n+10.0) / (9.0*n*(n-1)) +    z = tau / sqrt(svar) +    ##prob = erfc(abs(z)/1.4142136) +    ##return tau, prob +    return tau + +def main(): + +    if len(sys.argv) < 3: +        print "Usage: %s <file1> <file2>" % sys.argv[0] +        sys.exit(1) + +    x1 = [] +    x2= [] + +    lines = open(sys.argv[1]).readlines() + +    for l in lines: +        elem = [float(x) if "e" in x or "." in x else int(x) for x in l.strip(" \n").split()][0] +        x1.append(elem) + +    lines = open(sys.argv[2]).readlines() + +    for l in lines: +        elem = [float(x) if "e" in x or "." in x else int(x) for x in l.strip(" \n").split()][0] +        x2.append(elem) +     + +    tau = kendalltau(x1,x2) +    print tau + + +if  __name__ == "__main__": +    main() diff --git a/structure/correlations/dump_k_q.c b/structure/correlations/dump_k_q.c new file mode 100644 index 0000000..2b6ba79 --- /dev/null +++ b/structure/correlations/dump_k_q.c @@ -0,0 +1,50 @@ +/** + * + * Get the degree distributions of two layers and a pairing, and dump + * on output the pairs k-q  + * + * + */ + +#include <stdio.h> +#include <stdlib.h> + +void dump_k_q(double *R1, double *R2, int N, int *pairing){ + +  int i; + +  for (i=0; i<N; i++){ +    printf("%g %g\n", R1[i], R2[pairing[i]]); +  } +} + + +int main(int argc, char *argv[]){ + +  int N1, N2; +  double *R1 = NULL; +  double *R2 = NULL; +   +  int *pairing = NULL; + + +  if (argc < 4){ +    printf("Usage: %s <degs1> <degs2> <pairing>\n", argv[0]); +    exit(1); +  } + +  load_ranking(argv[1], &N1, &R1); +  load_ranking(argv[2], &N2, &R2); +   +  if (N1 != N2){ +    printf("Error!!!! The two files must have the same number of lines!!!! Exiting...\n"); +    exit(1); +  } +   +  pairing = malloc(N1 * sizeof(int)); +   +  load_pairing(&pairing, N1, argv[3]); +   +  dump_k_q(R1, R2, N1, pairing); + +} diff --git a/structure/correlations/fit_knn.c b/structure/correlations/fit_knn.c new file mode 100644 index 0000000..64fc4c3 --- /dev/null +++ b/structure/correlations/fit_knn.c @@ -0,0 +1,59 @@ +/** + *  + * Take a file which contains pairs in the form: + * + * k q + * + * compute qnn(k) and fit this function with a power-law + * + * a*x^{b} + * + * The fit is actually performed as a linear fit on the + * exponential-binned log-log distribution + * + * + */ + + +#include <stdio.h> +#include <stdlib.h> + +#include <gsl/gsl_fit.h> +#include "fit_utils.h" + + + +int main(int argc, char *argv[]){ + +  double *k, *q, *x, *y; +  int N, num; +  double alpha; + +  double c0, c1, cov00, cov01, cov11, sqsum; + +  k=q=x=y=NULL; + +  if (argc < 3){ +    printf("%s <filein> <alpha>\n", argv[0]); +    exit(1); +  } +   +  alpha=atof(argv[2]); +   +  fprintf(stderr,"Alpha: %g\n", alpha); +   +  load_sequence_col(argv[1], &k, &N, 0); +  load_sequence_col(argv[1], &q, &N, 1); +  //exit(1); +  exp_bin_avg(k, q, N, alpha, &x, &y, &num); +  //normalize_distr(x, y, num); +  //dump_distr(x, y, num); +  compact_distr(x, y, &num); +  //  dump_distr(x,y,num); +  map_vec(x, num, log); +  map_vec(y, num, log); +  //dump_distr(x,y,num); + +  gsl_fit_linear(x, 1, y, 1, num, &c0, &c1, &cov00, &cov01, &cov11, &sqsum); +  fprintf(stderr, "a: %g b: %g\n", exp(c0), c1); +} diff --git a/structure/correlations/fit_utils.c b/structure/correlations/fit_utils.c new file mode 100644 index 0000000..e9e3954 --- /dev/null +++ b/structure/correlations/fit_utils.c @@ -0,0 +1,382 @@ +/** + * + * Fit a given sequence with a power-law function + * + * a*x^{b} + * + * The fit is actually performed as a linear fit on the + * exponential-binned log-log distribution + * + * + */ + + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <gsl/gsl_fit.h> + +/** + *  + * Load a sequence from a file, which contains one element on each line + * + */ + +void load_sequence(char *fname, double **v, int *N){ + +  int size; +  char buff[256]; +   +  FILE *f; +   +  f = fopen(fname, "r"); +  if (!f){ +    fprintf(stderr, "Error opening file %s!!!! Exiting...\n", fname); +    exit(1); +  } +   +  *N =0; +  size = 10; +  if (*v == NULL) +    *v = malloc(size * sizeof(double)); +  else{ +    *v = realloc(*v, size * sizeof(double)); +  } +   +  while (fgets(buff, 255, f)){ +    (*v)[*N] = atof(buff); +    *N += 1; +    if (*N == size){ +      size += 10; +      *v = realloc(*v, size * sizeof(double)); +    } +  } +  *v = realloc(*v, (*N) * sizeof(double)); +  fclose(f); +} + + +/** + * + * Load a sequence, getting the "col"-th column of the input file + * + */ + +void load_sequence_col(char *fname, double **v, int *N, int col){ + +  int size, n; +  char buff[256]; +  char *ptr; + +  FILE *f; +   +  f = fopen(fname, "r"); +  if (!f){ +    fprintf(stderr, "Error opening file %s!!!! Exiting...\n", fname); +    exit(1); +  } +   +  *N =0; +  size = 10; +  if (*v == NULL) +    *v = malloc(size * sizeof(double)); +  else{ +    *v = realloc(*v, size * sizeof(double)); +  } +   +  while (fgets(buff, 255, f)){ +    ptr = strtok(buff, " "); +    if (col > 0){ +      n = 0; +      while (n<col){ +        ptr = strtok(NULL, " "); +        n += 1; +      } +    } +    (*v)[*N] = atof(ptr); +    *N += 1; + +    if (*N == size){ +      size += 10; +      *v = realloc(*v, size * sizeof(double)); +    } +  } +  *v = realloc(*v, (*N) * sizeof(double)); +  fclose(f); +} + + +/** + * + * Make the exponential binning of a distribution, with a giving + * exponent alpha. The value of y[i] is the number of elements of v + * whose value lies between x[i-1] and x[i]... + * + */ + +void exp_bin_cnt(double *v, int N, double alpha, double **x, double **y, int *num){ +   +  double min_v, max_v, val, last_val; +  int i, j, size, num_x; +  double last_size, new_size; +   +  min_v = max_v = v[0]; +   +  for (i=1; i<N; i ++){ +    if (v[i] > max_v) +      max_v = v[i]; +    else if (v[i] > 0 && v[i] < min_v) +      min_v = v[i]; +  } + +  size = 10; +  if (*x == NULL){ +    *x = malloc(size * sizeof(double)); +  } +  else{ +    *x = realloc(*x, size * sizeof(double)); +  } +   +  val = min_v; +  last_size = min_v; +  (*x)[0] = min_v; +  num_x = 1; + +  while(val < max_v){ +    new_size = last_size * alpha; +    val = last_size + new_size; +    last_size = new_size; +    last_val = val; +    (*x)[num_x] = val; +    num_x += 1; +    if (num_x == size){ +      size += 10; +      *x = realloc(*x, size * sizeof(double)); +    } +  } +   +  if (*y == NULL){ +    *y = malloc(num_x * sizeof(double)); +  } +  else{ +    *y = realloc(*y, num_x * sizeof(double)); +  } +  for (i=0; i < num_x; i++){ +    (*y)[i] = 0; +  } + +   + +  for(i=0; i <N; i ++){ +    j = 0; +    while(v[i] > (*x)[j]){ +      j ++; +    } +    (*y)[j] += 1; +  } +  *num = num_x; +} + +/** + * + * Make the exponential binning of a distribution, with a giving + * exponent alpha. The value of y[i] is the average of the values in + * the vector "w" whose corresponding v lies between x[i-1] and  x[i]... + * + */ + + +void exp_bin_avg(double *v, double *w, int N, double alpha, double **x, double **y, int *num){ +   +  double min_v, max_v, val, last_val; +  int i, j, size, num_x; +  double last_size, new_size; +  int *cnt; + + +  min_v = max_v = v[0]; +   +  for (i=1; i<N; i ++){ +    if (v[i] > max_v) +      max_v = v[i]; +    else if (v[i] > 0 && v[i] < min_v) +      min_v = v[i]; +  } + +  size = 10; +  if (*x == NULL){ +    *x = malloc(size * sizeof(double)); +  } +  else{ +    *x = realloc(*x, size * sizeof(double)); +  } +   +  val = min_v; +  last_size = min_v; +  (*x)[0] = min_v; +  num_x = 1; + +  while(val < max_v){ +    new_size = last_size * alpha; +    val = last_size + new_size; +    last_size = new_size; +    last_val = val; +    (*x)[num_x] = val; +    num_x += 1; +    if (num_x == size){ +      size += 10; +      *x = realloc(*x, size * sizeof(double)); +    } +  } +   +   +  cnt = malloc(num_x * sizeof(int)); +   +  if (*y == NULL){ +    *y = malloc(num_x * sizeof(double)); +  } +  else{ +    *y = realloc(*y, num_x * sizeof(double)); +  } +  for (i=0; i < num_x; i++){ +    (*y)[i] = 0; +    cnt[i] = 0; +  } + +  for(i=0; i <N; i ++){ +    j = 0; +    while(j < num_x && v[i] > (*x)[j]){ +      j ++; +    } +    if(j == num_x){ +      printf("Error!!!!! Trying to assing a non-existing bin!!! -- fit_utils.c:exp_bin_avg!!!\n"); +      exit(37); +    } +    (*y)[j] += w[i]; +    cnt[j] += 1; +  } +  *num = num_x; +   +  for(i=0; i< num_x; i++){ +    if (cnt[i] > 0){ +      (*y)[i] = (*y)[i] / cnt[i]; +    } +  } +  free(cnt); +} + + +/** + * + * Print a distribution on stdout + * + */ + +void dump_distr(double *x, double *y, int N){ +  int i; +   +  for(i=0; i<N; i ++){ +    printf("%g %g\n", x[i], y[i]); +  } +   +} + + +/** + * Compact a distribution, leaving only the pairs (x_i, y_i) for which + * y_i > 0 + * + */ + +void compact_distr(double *x, double *y, int *num){ +   +  int i, j; +   +  i = j = 0; +  while(j < *num){ +    while(j < *num && y[j] == 0){ +      j ++; +    } +    if (j==*num){ +      break; +    } +    x[i] = x[j]; +    y[i] = y[j]; +    i ++; +    j ++; +  } +  *num = i; +} + + +/** + * + * Apply the function "f" on all the elemnts of a vector, in-place + * + */ + +void map_vec(double *v, int N, double (*f)(double)){ +  int i; +   +  for (i=0; i<N; i ++){ +    v[i] = f(v[i]); +  } +} + + +/** + * + * Normalize a distribution, dividing each y[i] for the width of the + * corresponding bin (i.e., x[i] - x[i-1]) + * + */ +void normalize_distr(double *x, double *y, int num){ +   +  int i; +   +  for(i=1; i<num; i ++){ +    y[i] /= (x[i] - x[i-1]); +  } +} + +/** + * + * take two vectors (k and q) and a pairing, and compute the best + * power-law fit "a*k^{mu}" of qnn(k) + * + */ + +void fit_current_trend(double *k, double *q, int N, int *pairing, double *mu, double *a,  +                       double *corr){ + +  static int  *num; +  static double *q_pair, *x, *y; + +  int i; +  int fit_num; +  double c0, c1, cov00, cov01, cov11, sqsum; +   +  if (q_pair == NULL){ +    q_pair = malloc(N * sizeof(double)); +  } + +  for(i=0; i<N; i++){ +    q_pair[i] = q[pairing[i]]; +  } + +  exp_bin_avg(k, q_pair, N, 1.3, &x, &y, &fit_num); +  //normalize_distr(x,y, fit_num); +  compact_distr(x, y, &fit_num); +   +  map_vec(x, fit_num, log); +  map_vec(y, fit_num, log); +  gsl_fit_linear(x, 1, y, 1, fit_num, &c0, &c1, &cov00, &cov01, &cov11, &sqsum); + +  //fprintf(stderr,"cov00: %g cov01: %g cov11: %g\n", cov00, cov01, cov11); +  //fprintf(stderr, "corr: %g ", cov01 / sqrt(cov00 * cov11)); +  *a = c0; +  *mu = c1; +  *corr = cov01/sqrt(cov00 * cov11); +} + diff --git a/structure/correlations/fit_utils.h b/structure/correlations/fit_utils.h new file mode 100644 index 0000000..183f47c --- /dev/null +++ b/structure/correlations/fit_utils.h @@ -0,0 +1,25 @@ +#ifndef __FIT_UTILS_H__ +#define __FIT_UTILS_H__ + + +void load_sequence(char *fname, double **v, int *N); + +void load_sequence_col(char *fname, double **v, int *N, int col); + +void exp_bin_cnt(double *v, int N, double alpha, double **x, double **y, int *num); + +void exp_bin_avg(double *v, double *w, int N, double alpha, double **x, double **y, int *num); + +void dump_distr(double *x, double *y, int N); + +void compact_distr(double *x, double *y, int *num); + +void map_vec(double *v, int N, double (*f)(double)); + +void normalize_distr(double *x, double *y, int num); + +void fit_current_trend(double *k, double *q, int N, int *pairing,  +                       double *mu, double *a, double *corr); + + +#endif /* __FIT_UTILS_H__ */ diff --git a/structure/correlations/knn_q_from_degrees.py b/structure/correlations/knn_q_from_degrees.py new file mode 100644 index 0000000..5e10bfa --- /dev/null +++ b/structure/correlations/knn_q_from_degrees.py @@ -0,0 +1,49 @@ +#### +## +## +## Take a file with the degree sequence of nodes which are active on +## both layers, and compute \overline{k}(q) and \overline{q}(k), +## i.e. the two inter-layer correlation functions, where we call "k" +## the degree on the first column, and "q" the degree on the second +## column +## +## + +import sys + +if len(sys.argv) < 2: +    print "Usage: %s <filein>" % sys.argv[0] +    sys.exit(1) + +qnn_k = {} +knn_q = {} + +with open(sys.argv[1]) as f: +    for l in f: +        k, q = [int(x) for x in l.strip(" \n").split(" ")] +        if qnn_k.has_key(k): +            qnn_k[k].append(q) +        else: +            qnn_k[k] = [q] +        if knn_q.has_key(q): +            knn_q[q].append(k) +        else: +            knn_q[q] = [k] + + + + +keys= qnn_k.keys() +keys.sort() +for k in keys: +    print k, 1.0 * sum(qnn_k[k])/len(qnn_k[k]) + + + + +keys= knn_q.keys() +keys.sort() +for q in keys: +    sys.stderr.write("%d %f\n" % (q, 1.0 * sum(knn_q[q])/len(knn_q[q]))) + + diff --git a/structure/correlations/knn_q_from_layers.py b/structure/correlations/knn_q_from_layers.py new file mode 100644 index 0000000..c108c18 --- /dev/null +++ b/structure/correlations/knn_q_from_layers.py @@ -0,0 +1,98 @@ +#### +## +## +## Compute the degree-degree correlations of a multiplex graph, namely: +## +## <k_1>(k_2) and <k_2>(k_1) +## +## Takes as input the two lists of edges corresponding to each layer +## + +import sys +import numpy as np +import networkx as net + + +def knn(G, n): +    neigh = G.neighbors(n) +    l = G.degree(neigh).values() +    return 1.0 * sum(l) / len(l)  + + +if len(sys.argv) < 2: +    print "Usage: %s <layer1> <layer2>" % sys.argv[0] +    sys.exit(1) + + +G1 = net.read_edgelist(sys.argv[1]) +G2 = net.read_edgelist(sys.argv[2]) + +k1_k1 = {} ## Intraleyer knn (k1) +k2_k2 = {} ## Intralayer knn (k2) +k1_k2 = {} ## Interlayer average degree at layer 1 of a node having degree k_2 in layer 2 +k2_k1 = {} ## Interlayer average degree at layer 2 of a node having degree k_1 in layer 1 + + +for n in G1.nodes(): +    k1 = G1.degree(n) + +     +    ##print k1,k2 + +    knn1 = knn(G1, n) +    if n in G2.nodes(): +        k2 = G2.degree(n) +        knn2 = knn(G2, n) +    else: +        k2 = 0 +        knn2 = 0 + +    if k1_k1.has_key(k1): +        k1_k1[k1].append(knn1) +    else: +        k1_k1[k1] = [knn1] + +    if k2_k2.has_key(k2): +        k2_k2[k2].append(knn2) +    else: +        k2_k2[k2] = [knn2] +         + +    if k1_k2.has_key(k2): +        k1_k2[k2].append(k1) +    else: +        k1_k2[k2] = [k1] + +    if k2_k1.has_key(k1): +        k2_k1[k1].append(k2) +    else: +        k2_k1[k1] = [k2] +         +         +k1_keys = k1_k1.keys() +k1_keys.sort() +k2_keys = k2_k2.keys() +k2_keys.sort() + + +f = open("%s_%s_k1" % (sys.argv[1], sys.argv[2]), "w+") + +for n in k1_keys: +    avg_knn = np.mean(k1_k1[n]) +    std_knn = np.std(k1_k1[n]) +    avg_k2 = np.mean(k2_k1[n]) +    std_k2 = np.std(k2_k1[n]) +    f.write("%d %f %f %f %f\n" % (n, avg_knn, std_knn, avg_k2, std_k2)) + +f.close() + +f = open("%s_%s_k2" % (sys.argv[1], sys.argv[2]), "w+") + +for n in k2_keys: +    avg_knn = np.mean(k2_k2[n]) +    std_knn = np.std(k2_k2[n]) +    avg_k1 = np.mean(k1_k2[n]) +    std_k1 = np.std(k1_k2[n]) +    f.write("%d %f %f %f %f\n" % (n, avg_knn, std_knn, avg_k1, std_k1)) +f.close() + diff --git a/structure/correlations/rank_nodes.py b/structure/correlations/rank_nodes.py new file mode 100644 index 0000000..6985e88 --- /dev/null +++ b/structure/correlations/rank_nodes.py @@ -0,0 +1,74 @@ +#### +## +## +## Get a file as input, whose n-th line corresponds to the value of a +## certain property of node n, and rank nodes according to their +## properties, taking into account ranking ties properly. +## +## The output is a file whose n-th line is the "ranking" of the n-th +## node according to the given property. (notice that rankings could +## be fractional, due to the tie removal algorithm) +## +## + + +import sys +import math + + +if len(sys.argv) < 2: +    print "Usage: %s <filein>" % sys.argv[0] +    sys.exit(1) + + + +lines = open(sys.argv[1], "r").readlines() + +ranking = [] + +n=0 +for l in lines: +    if l[0] == "#" or l.strip(" \n").split(" ") == []: +        continue +    v = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split(" ")][0] +    ranking.append((v,n)) +    n +=1 + +ranking.sort(reverse=True) +#print ranking +new_ranking = {} + +v0, n0 = ranking[0] + +old_value = v0 +tot_rankings = 1 + +stack = [n0] +l=1.0 +for v,n in ranking[1:]: +    l += 1 +    ##print stack, tot_rankings +    if v != old_value: ### There is a new rank +        # We first compute the rank for all the nodes in the stack and then we set it +        new_rank_value = 1.0 * tot_rankings / len(stack) +        ##print new_rank_value +        for j in stack: +            new_ranking[j] = new_rank_value +        old_value = v +        tot_rankings = l +        stack = [n] +    else: # One more value with the same rank, keep it for the future +        stack.append(n) +        tot_rankings += l + +new_rank_value = 1.0 * tot_rankings / len(stack) +#print new_rank_value +for j in stack: +    new_ranking[j] = new_rank_value + +#print new_ranking + +keys = new_ranking.keys() +keys.sort() +for k in keys: +    print new_ranking[k] diff --git a/structure/correlations/rank_nodes_thresh.py b/structure/correlations/rank_nodes_thresh.py new file mode 100644 index 0000000..44b565a --- /dev/null +++ b/structure/correlations/rank_nodes_thresh.py @@ -0,0 +1,87 @@ +#### +## +## +## Get a file as input, whose n-th line corresponds to the value of a +## certain property of node n, and rank nodes according to their +## properties, taking into account ranking ties properly. +## +## The output is a file whose n-th line is the "ranking" of the n-th +## node according to the given property. (notice that rankings could +## be fractional, due to the tie removal algorithm) +## +## The rank of a node is set to "0" (ZERO) if the corresponding +## property is smaller than a value given as second parameter +## + + +import sys +import math + + +if len(sys.argv) < 3: +    print "Usage: %s <filein> <thresh>" % sys.argv[0] +    sys.exit(1) + + +thresh = float(sys.argv[2]) + +lines = open(sys.argv[1], "r").readlines() + +ranking = [] + +n=0 +for l in lines: +    if l[0] == "#" or l.strip(" \n").split(" ") == []: +        continue +    v = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split(" ")][0] +    if v >= thresh: +        ranking.append((v,n)) +    else: +        ranking.append((0,n)) +    n +=1 + +ranking.sort(reverse=True) +#print ranking +new_ranking = {} + +v0, n0 = ranking[0] + + +old_value = v0 +tot_rankings = 1 + +stack = [n0] +l=1.0 +for v,n in ranking[1:]: +    l += 1 +    ##print stack, tot_rankings +    if v != old_value: ### There is a new rank +        # We first compute the rank for all the nodes in the stack and then we set it +        if old_value == 0: +            new_rank_value = 0 +        else: +            new_rank_value = 1.0 * tot_rankings / len(stack) +        ##print new_rank_value +        for j in stack: +            new_ranking[j] = new_rank_value +        old_value = v +        tot_rankings = l +        stack = [n] +    else: # One more value with the same rank, keep it for the future +        stack.append(n) +        tot_rankings += l + +if v == 0 : +    new_rank_value = 0 +else: +    new_rank_value = 1.0 * tot_rankings / len(stack) +#print new_rank_value +for j in stack: +    new_ranking[j] = new_rank_value + +#print new_ranking + +keys = new_ranking.keys() +keys.sort() +for k in keys: +    print new_ranking[k] diff --git a/structure/correlations/rank_occurrence.py b/structure/correlations/rank_occurrence.py new file mode 100644 index 0000000..6339d5d --- /dev/null +++ b/structure/correlations/rank_occurrence.py @@ -0,0 +1,45 @@ +#### +## +## Get two rankings and compute the size of the k-intersection, +## i.e. the number of elements which are present in the first k +## positions of both rankings, as a function of k +## +## + + +import sys + + +if len(sys.argv)< 4: +    print "Usage: %s <file1> <file2> <increment>" % sys.argv[0] +    sys.exit(1) + +incr = int(sys.argv[3]) + +rank1 = [] +rank2 = [] + +lines = open(sys.argv[1], "r").readlines() + +for l in lines: +    n= l.strip(" \n").split(" ")[0] +    rank1.append(n) + +lines = open(sys.argv[2], "r").readlines() + +for l in lines: +    n= l.strip(" \n").split(" ")[0] +    rank2.append(n) + + +N = len(rank1) + +i = incr + +while i < N+incr: +    l = len(set(rank1[:i]) & set(rank2[:i])) +    print i, l +    i += incr + + + diff --git a/structure/correlations/rank_utils.c b/structure/correlations/rank_utils.c new file mode 100644 index 0000000..4b8ef41 --- /dev/null +++ b/structure/correlations/rank_utils.c @@ -0,0 +1,217 @@ +/** + * + * Some utility functions to be used with tune_rho, compute_rho and + * the like + * + */ + + +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <string.h> +#include <time.h> + +#include "rank_utils.h" + +void load_ranking(char *fname, int *N, double **R){ +   +  char buff[256]; +  int size = 10; +  FILE *f; +   +  if (*R == NULL){ +    *R = malloc(size * sizeof (double)); +  } +  f = fopen(fname, "r"); +  if (!f){ +    printf("Unable to open file: %s!!! Exiting\n"); +    exit(1); +  } +   +  *N = 0; +  +  while (fgets(buff, 255, f)){ +    if (* N == size){ +      size += 10; +      *R = realloc(*R, size * sizeof(double)); +    } +    (*R)[*N] = atof(buff); +    *N += 1; +  } +} + +double avg_array(double *v, int N){ +   +  double sum = 0.0; +  int i; +   +  for (i = 0; i < N; i ++){ +    sum += v[i]; +  } +  return sum/N; +} + +double compute_C(double *R1, double *R2, int N){ +  double mu1, mu2, sum1, sum2; + +  mu1 = avg_array(R1, N); +  mu2 = avg_array(R2, N); +   +  sum1 = mu1 * N; +  sum2 = mu2 * N; +   +  return N * mu1 * mu2 - mu2 * sum1 - mu1 * sum2; +} + + +double compute_D(double *R1, double *R2, int N){ +   +  double mu1, mu2, s1, s2; +  int i; + +  mu1 = avg_array(R1, N); +  mu2 = avg_array(R1, N); +   +  s1 = s2 = 0.0; +   +  for (i=0 ; i < N; i ++){ +    s1 += pow((R1[i] - mu1), 2); +    s2 += pow((R2[i] - mu2), 2); +  } +   +  return sqrt(s1 * s2); +} + + +double compute_rho(double *R1, double *R2, int N, int *pairing){ +   +  double rho = 0; +  int i; +   +  for (i=0; i < N; i ++){ +    rho += R1[i] * R2[ pairing[i] ]; +  } + +  rho = (rho + compute_C(R1, R2, N))/ compute_D(R1, R2, N); +  return rho; +} + +void dump_ranking(double *R, int N){ +  int i; + +  for (i=0; i < N; i ++){ +    printf("%d: %f\n", i, R[i] ); +  } +} + + +void init_pairing_natural(int *pairing, int N){ +  int i; + +  for (i = 0; i< N; i ++){ +    pairing[i] = i; +  } +} + +void init_pairing_inverse(int *pairing, int N){ +  int i; + +  for (i = 0; i< N; i ++){ +    pairing[i] = N-i-1; +  } +} + +void select_pairing(int *pairing, int N, int argc, char *argv[], int pos){ + +  if (argc < pos + 1 || !strncasecmp("rnd", argv[pos], 3)){ +    init_pairing_random(pairing, N); +  } +  else if (!strncasecmp("nat", argv[pos], 3)){ +    init_pairing_natural(pairing, N); +  } +  else if (!strncasecmp("inv", argv[pos], 3)){ +    init_pairing_inverse(pairing, N); +  } +  else{ +    printf ("Pairing strategy \"%s\" unknown!!! Exiting...\n", argv[pos]); +    exit(1); +  } + +} + + +void shuffle_sequence(int *s, int N){ +   +  int i, j, tmp; + +  for (i=N-1; i>=0; i--){ +    j = rand() % (i+1); +    tmp = s[j]; +    s[j] = s[i]; +    s[i] = tmp; +  } +} + + + +void init_pairing_random(int *pairing, int N){ + +  init_pairing_natural(pairing, N); +  shuffle_sequence(pairing, N); + +} + +/* Loads a pairing from a file, in the format: + *  + * rank1 rank2 + * ........... + */ +void load_pairing(int **pairing, int N, char *fname){ +   +  FILE *f; +  int i, j, num; +  char buff[256]; +  char *ptr; + +  f = fopen(fname, "r"); +  if (!f){ +    printf("Error opening file \"%s\"!!!! Exiting....\n", fname); +    exit(2); +  } + +  if (*pairing == NULL){ +    *pairing = malloc(N * sizeof(int)); +    init_pairing_natural(*pairing, N); +  } +   +  num = 0; +  while(num < N){ +    fgets(buff, 255, f); +    if (buff[0] == '#') +      continue; +    ptr = strtok(buff, " "); /* read the first node */ +    i = atoi(ptr); +    ptr = strtok(NULL, " "); /* read the second node */ +    j = atoi(ptr); +    (*pairing)[i] = j; +    num += 1; +  } +} + + +void dump_pairing(int *pairing, int N){ +  int i; + +  for(i=0; i< N; i ++){ +    printf("%d %d\n", i, pairing[i]); +  } +} + +void copy_pairing(int *p1, int *p2, int N){ +  int i; +   +  for (i=0 ; i < N; i ++){ +    p2[i] = p1[i]; +  } +} diff --git a/structure/correlations/rank_utils.h b/structure/correlations/rank_utils.h new file mode 100644 index 0000000..521582a --- /dev/null +++ b/structure/correlations/rank_utils.h @@ -0,0 +1,44 @@ +#ifndef __RANK_UTILS_H__ +#define __RANK_UTILS_H__ + +/* Load a ranking */ +void load_ranking(char *fname, int *N, double **R); + +/* Compute the average of an array */ +double avg_array(double *v, int N); + +/* Compute the term "C" in the rho correlation coefficient */ +double compute_C(double *R1, double *R2, int N); + +/* Compute the term "D" in the rho correlation coefficient */ +double compute_D(double *R1, double *R2, int N); + +/* Compute the Spearman's rank correlation coefficient \rho */ +double compute_rho(double *R1, double *R2, int N, int *pairing); + +void dump_ranking(double *R, int N); + +void init_pairing_natural(int *pairing, int N); + +void init_pairing_inverse(int *pairing, int N); + +void init_pairing_random(int *pairing, int N); + +void select_pairing(int *pairing, int N, int argc, char *argv[], int pos); + +void shuffle_sequence(int *s, int N); + +void dump_pairing(int *pairing, int N); + +void copy_pairing(int *pairing1, int *pairing2, int N); + + +#endif /* __RANK_UTILS_H__ */ + + + + + + + + diff --git a/structure/metrics/aggregate_layers_w.py b/structure/metrics/aggregate_layers_w.py new file mode 100644 index 0000000..cd5ea1d --- /dev/null +++ b/structure/metrics/aggregate_layers_w.py @@ -0,0 +1,37 @@ +#### +##  +## Aggregate the layers of a multiplex, weigthing each edge according +## to the number of times it occurs in the different layers +## + +import sys +import networkx as net + + +if len(sys.argv) < 3: +    print "Usage: %s <file1> <file2> [<file3>....]" % sys.argv(0) +    sys.exit(1) + +G = net.Graph() + +lines = open(sys.argv[1], "r").readlines() + +for l in lines: +    s,d = [int(x) for x in l.strip(" \n").split(" ")[:2]] +    G.add_edge(s,d) +    G[s][d]['weigth'] = 1 + +for f in sys.argv[2:]: +    lines = open(f, "r").readlines() +    for l in lines: +        s,d = [int(x) for x in l.strip(" \n").split(" ")[:2]] +        if (G.has_edge(s,d)): +            G[s][d]['weigth'] += 1 +        else: +            G.add_edge(s,d) +            G[s][d]['weigth'] = 1 + +for s,d in G.edges(): +    print s,d, G[s][d]['weigth'] +         + diff --git a/structure/metrics/avg_edge_overlap.py b/structure/metrics/avg_edge_overlap.py new file mode 100644 index 0000000..e68fd8b --- /dev/null +++ b/structure/metrics/avg_edge_overlap.py @@ -0,0 +1,47 @@ +#### +## +## Compute the average edge overlap of a multiplex, i.e. the average +## number of layers in which an edge is present +## +## + +import sys + + +if len(sys.argv) < 2: +    print "Usage: %s <layer1> [<layer2>...]" % sys.argv[0] +    sys.exit(1) + +max_N = -1 + +all_edges = {} + +layer_ID = -1 + +for layer in sys.argv[1:]: +    layer_ID += 1 +    with open(layer, "r") as lines: +        for l in lines: +            if l[0] == "#": +                continue +            s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]] +            if s > d: +                tmp = s +                s = d +                d = tmp +            if (s,d) in all_edges: +                all_edges[(s,d)].append(layer_ID) +            else: +                all_edges[(s,d)] = [layer_ID] +                 +K = len(all_edges.keys()) +M = len(sys.argv) - 1 + +numer = 0 + +for k in all_edges.keys(): +    numer += len(set(all_edges[(k)])) + + +print 1.0 * numer / K, 1.0 * numer / (K * M) + diff --git a/structure/metrics/cartography_from_columns.py b/structure/metrics/cartography_from_columns.py new file mode 100644 index 0000000..a2343ed --- /dev/null +++ b/structure/metrics/cartography_from_columns.py @@ -0,0 +1,44 @@ +#### +## +## Make a cartography (i.e., sum and participation coefficient) based +## on the values found at the given column numbers of the file given +## as input, e.g.: +## +##    cartography_cols.py FILEIN 1 5 8 14 +## +## makes the assumption that the system has 4 layers, and that the +## quantities involved in the cartography are at the 2nd, 6th, 9th and +## 15th column of FILEIN +##  +## +## + +import sys + +if len(sys.argv) < 4: +    print "Usage: %s <filein> <col1> <col2> [<col3> <col4>...]" % sys.argv[0] +    sys.exit(1) + +filein=sys.argv[1] +cols = [int(x) for x in sys.argv[2:]] + +M = len(cols) + +with open(filein,"r") as f: +    for l in f: +        if l[0] == "#": +            continue +        elems = [float(x) if "e" in x or "." in x else int(x) for x in l.strip(" \n").split(" ")] +        sum_elems = 0 +        part = 0 +        for c in cols: +            val = elems[c] +            sum_elems += val +            part += val*val +        if sum_elems > 0: +            part = M * 1.0 / (M -1) * (1 - part * 1.0 / (sum_elems * sum_elems)) +        else: +            part = 0.0 +        print elems[0], sum_elems, part + +         diff --git a/structure/metrics/cartography_from_deg_vectors.py b/structure/metrics/cartography_from_deg_vectors.py new file mode 100644 index 0000000..c40d701 --- /dev/null +++ b/structure/metrics/cartography_from_deg_vectors.py @@ -0,0 +1,37 @@ +#### +## +## Take as input a file containing, on each line, the degree vector of +## a node of the multiplex, and compute the multiplex cartography +## diagram +## +## + +import sys + +if len(sys.argv) < 2: +    print "Usage: %s <node_deg_vectors>" % sys.argv[0] +    sys.exit(1) + +filein=sys.argv[1] + +M = -1 + +with open(filein,"r") as lines: +    for l in lines: +        if l[0] == "#": +            continue +        elems = [int(x) for x in l.strip(" \n").split(" ")] +        if (M == -1): +            M = len(elems) +        sum_elems = 0 +        part = 0 +        for val in elems: +            sum_elems += val +            part += val*val +        if sum_elems > 0: +            part = M * 1.0 / (M -1) * (1 - part * 1.0 / (sum_elems * sum_elems)) +        else: +            part = 0.0 +        print sum_elems, part + +         diff --git a/structure/metrics/cartography_from_layers.py b/structure/metrics/cartography_from_layers.py new file mode 100644 index 0000000..b9c4584 --- /dev/null +++ b/structure/metrics/cartography_from_layers.py @@ -0,0 +1,54 @@ +#### +## +## Compute the participation coefficient of each node of a multiplex +##  +## + +import sys +import networkx as net +import collections +from compiler.ast import flatten + + + +if len(sys.argv) < 3: +    print "Usage: %s <layer1> <layer2> [<layer3>...]" % sys.argv[0] +    sys.exit(1) + + +layers = [] + +for f in sys.argv[1:]: +    G = net.Graph(net.read_edgelist(f, nodetype=int)) +    layers.append(G) + +nodes = flatten([x for j in layers for x in j.nodes()]) +#nodes.sort() +nodes = set(nodes) + +M = len(layers) + +#print nodes + +for n in nodes: +    deg_alpha_square = 0 +    deg = 0 +    col = 0 +    print n,  +    for l in layers: +        val = l.degree([n]) +        if not val: +            col = 2* col +            continue +        col *= 2 +        col += 1 +        val = val[n] +        deg += val +        deg_alpha_square += val*val +    if deg > 0: +        print  deg, 1.0 * M / (M-1) * (1.0 - 1.0 * deg_alpha_square/(deg * deg)) , col +    else: +        print  0 , 0, col + +     +     diff --git a/structure/metrics/edge_overlap.py b/structure/metrics/edge_overlap.py new file mode 100644 index 0000000..1b27f55 --- /dev/null +++ b/structure/metrics/edge_overlap.py @@ -0,0 +1,43 @@ +#### +## +## Compute the edge overlap of each edge of the multiplex, i.e. the +## number of times that each edge appears in the multiplex +## +## + +import sys + + +if len(sys.argv) < 2: +    print "Usage: %s <layer1> [<layer2>...]" % sys.argv[0] +    sys.exit(1) + +max_N = -1 + +all_edges = {} + +layer_ID = -1 + +for layer in sys.argv[1:]: +    layer_ID += 1 +    with open(layer, "r") as lines: +        for l in lines: +            if l[0] == "#": +                continue +            s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]] +            if s > d: +                tmp = s +                s = d +                d = tmp +            if (s,d) in all_edges: +                all_edges[(s,d)].append(layer_ID) +            else: +                all_edges[(s,d)] = [layer_ID] +                 +K = len(all_edges.keys()) + +for k in all_edges.keys(): +    val = len(set(all_edges[(k)])) +    s,d = k +    print s, d, val + diff --git a/structure/metrics/intersect_layers.py b/structure/metrics/intersect_layers.py new file mode 100644 index 0000000..a9da00c --- /dev/null +++ b/structure/metrics/intersect_layers.py @@ -0,0 +1,47 @@ +#### +## +## Take a certain number of networks as input and give as output the +## corresponding intersection graph, i.e. the graph in which an edge +## exists between i and j if and only if it exists in ALL the graphs +## given as input +## + + +import sys + + +if len(sys.argv)< 3: +    print "Usage: %s <file1> <file2> [<file3>....]" % sys.argv[0] +    sys.exit(1) + + +graphs = {} + +num = 0 + +for fname in sys.argv[1:]: + +    lines = open(fname).readlines() +    graphs[num] = [] +    for l in lines: +        s,d = [int(x) for x in l.strip(" \n").split(" ")][:2] +        if s > d: +            graphs[num].append((d,s)) +        else: +            graphs[num].append((d,s)) +    num += 1 + +#print graphs + + +for edge in graphs[0]: +    to_print = True +    for i in range(1, num): +        if edge not in graphs[i]: +            to_print = False +            break +    if to_print: +        s,d = edge +        print s,d + + diff --git a/structure/metrics/overlap_degree.py b/structure/metrics/overlap_degree.py new file mode 100644 index 0000000..fa69434 --- /dev/null +++ b/structure/metrics/overlap_degree.py @@ -0,0 +1,47 @@ +#### +## +## Compute the overlapping degree for each node and the corresponding +## z-score  +## +## + +import sys +import numpy + + +if len(sys.argv) < 2: +    print "Usage: %s <layer1> <layer2> [<layer3>...]" % sys.argv[0] +    sys.exit(1) + + +nodes = {} + +for f in sys.argv[1:]: + +    lines = open(f).readlines() + +    for l in lines: +        if l[0] == "#": +            continue +        s,d = [int(x) for x in l.strip(" \n").split(" ")] +        if nodes.has_key(s): +            nodes[s] +=1 +        else: +            nodes[s] = 1 +        if nodes.has_key(d): +            nodes[d] +=1 +        else: +            nodes[d] = 1 + + +degrees = nodes.values() +avg_deg = numpy.mean(degrees) +std_deg = numpy.std(degrees) + +#print avg_deg, std_deg + +keys = nodes.keys() +keys.sort() + +for n in keys: +    print n, nodes[n], (nodes[n] - avg_deg)/std_deg diff --git a/structure/reinforcement/reinforcement.py b/structure/reinforcement/reinforcement.py new file mode 100644 index 0000000..5939e59 --- /dev/null +++ b/structure/reinforcement/reinforcement.py @@ -0,0 +1,61 @@ +import networkx as nx +import sys + + +if __name__ == "__main__": + +	 +	if len(sys.argv) < 6: +	    print "Usage: %s <layer1> <layer2> <N bins> <min_value> <max_value>" % sys.argv[0] +	    sys.exit(1) + + +	filename_a = sys.argv[1] +	filename_b = sys.argv[2] +	 +	intervals=int(sys.argv[3]) +	minvalue=float(sys.argv[4]) +	maxvalue=float(sys.argv[5]) +	 +	 +	tot_a = [] +	pos_a = [] +	for t in range (intervals): +		tot_a.append(0) +		pos_a.append(0) + +	 + +	 +	fa=open(filename_a, 'r') +	Ga=nx.read_adjlist(fa) +	 +	 +	fb=open(filename_b, 'r') +	Gb=nx.read_weighted_edgelist(fb) +	 +	 + +	 +	for u,v in Gb.edges(): +		Gbw=Gb[u][v]['weight'] +		for i in range (intervals): +			a=minvalue+float(maxvalue-minvalue)*float(i)/intervals +			b=minvalue+float(maxvalue-minvalue)*float(i+1)/intervals +			if (Gbw>a and Gbw<b): +				tot_a[i]+=1 +				break +		if (Ga.has_edge(u,v)==True): +			pos_a[i]+=1 + +	freq_a=[] +	for i in range (intervals): +		if (tot_a[i]>0): +			freq_a.append(float(pos_a[i])/tot_a[i]) +		else: +			freq_a.append(0) +	print "#bin_minvalue bin_maxvalue frequence" +	for i in range (intervals): +		a=minvalue+float(maxvalue-minvalue)*float(i)/intervals +		b=minvalue+float(maxvalue-minvalue)*float(i+1)/intervals +		print a, b, freq_a[i]  | 
