diff options
| author | KatolaZ <katolaz@yahoo.it> | 2015-10-19 16:23:00 +0100 | 
|---|---|---|
| committer | KatolaZ <katolaz@yahoo.it> | 2015-10-19 16:23:00 +0100 | 
| commit | df8386f75b0538075d72d52693836bb8878f505b (patch) | |
| tree | 704c2a0836f8b9fd9f470c12b6ae05637c431468 /structure/correlations/compute_rho.py | |
| parent | 363274e79eade464247089c105260bc34940da07 (diff) | |
First commit of MAMMULT code
Diffstat (limited to 'structure/correlations/compute_rho.py')
| -rw-r--r-- | structure/correlations/compute_rho.py | 118 | 
1 files changed, 118 insertions, 0 deletions
diff --git a/structure/correlations/compute_rho.py b/structure/correlations/compute_rho.py new file mode 100644 index 0000000..8fa4174 --- /dev/null +++ b/structure/correlations/compute_rho.py @@ -0,0 +1,118 @@ +#### +## +## Get two rankings and a pairing, and compute the corresponding +## Spearman's rho rank correlation coefficient +## +## +## +## + +import sys +import random +import math +#import scipy.stats + +## +## Compute the constant C of the Spearman's rho correlation coefficient in the draft +##  +def compute_C(rank1, rank2): +    N = len(rank1) +    [mu1, mu2] = [1.0 * sum(x.values()) / len(x.values()) for x in [rank1, rank2]] +    [sum1, sum2] = [1.0 * sum(x.values()) for x in [rank1, rank2]] +    C = N * mu1 * mu2 - mu2 * sum1 - mu1 * sum2 + +    #print mu1, mu2, sum1, sum2, C + +    return C + + +## +## Compute the constant D of the Spearman's rho correlation coefficient in the draft +##  +def compute_D(rank1, rank2): +    [mu1, mu2] = [1.0 * sum(x.values()) / len(x.values()) for x in [rank1, rank2]] +    s1 = sum([math.pow((x-mu1), 2) for x in rank1.values()]) +    s2 = sum([math.pow((x-mu2), 2) for x in rank2.values()]) +     +    D = math.sqrt(s1 * s2) +    return D + +def compute_rho(rank1, rank2, pairing, C, D): + +    rho = 0 +    for s,t in pairing: +        rho += rank1[s] * rank2[t] +    rho = (rho + C) / D +    return rho + +if len(sys.argv) < 3: +    print "Usage: %s <rank1> <rank2> [<pairing>]" % sys.argv[0] +    sys.exit(1) + +rank1 = {} +rank2 = {} + +lines = open(sys.argv[1], "r").readlines() + +i = 0 +for l in lines: +    if l[0] == "#" or l.strip(" \n").split(" ") == []: ## Strip comments and empty lines +        continue +    r = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split(" ")][0] +    rank1[i] = r +    i += 1 + + +lines = open(sys.argv[2], "r").readlines() + +i = 0 +for l in lines: +    if l[0] == "#" or l.strip(" \n").split(" ") == []: ## Strip comments and empty lines +        continue +    r = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split(" ")][0] +    rank2[i] = r +    i += 1 + + +N1 = len(rank1) +N2 = len(rank2) + +if (N1 != N2): +    print "The two files must have the same number of nodes!!!!!" +    sys.exit(2) + + + +C = compute_C(rank1, rank2) +D = compute_D(rank1, rank2) + + +## We start from a random pairing, and compute the corresponding value of rho + +pairing = [] + +if len(sys.argv) > 3: +    lines = open(sys.argv[3], "r").readlines() +     +     + +    for l in lines: +        if l[0] == "#" or l.strip(" \n").split(" ") == []: +            continue +        p1, p2 = [int(x) for x in l.strip(" \n").split(" ")][:2] +        pairing.append((p1, p2)) + +else: +    for i in range (N1): +        pairing.append((i,i)) + + +if len(pairing) != N1: +    print "ERROR !!! The pairing should be of the same length of the ranking files!!!" +    sys.exit(1) + +rho = compute_rho(rank1, rank2, pairing, C, D) + +print rho + +  | 
