diff options
author | KatolaZ <katolaz@yahoo.it> | 2015-10-19 16:23:00 +0100 |
---|---|---|
committer | KatolaZ <katolaz@yahoo.it> | 2015-10-19 16:23:00 +0100 |
commit | df8386f75b0538075d72d52693836bb8878f505b (patch) | |
tree | 704c2a0836f8b9fd9f470c12b6ae05637c431468 /structure/correlations/compute_rho.py | |
parent | 363274e79eade464247089c105260bc34940da07 (diff) |
First commit of MAMMULT code
Diffstat (limited to 'structure/correlations/compute_rho.py')
-rw-r--r-- | structure/correlations/compute_rho.py | 118 |
1 files changed, 118 insertions, 0 deletions
diff --git a/structure/correlations/compute_rho.py b/structure/correlations/compute_rho.py new file mode 100644 index 0000000..8fa4174 --- /dev/null +++ b/structure/correlations/compute_rho.py @@ -0,0 +1,118 @@ +#### +## +## Get two rankings and a pairing, and compute the corresponding +## Spearman's rho rank correlation coefficient +## +## +## +## + +import sys +import random +import math +#import scipy.stats + +## +## Compute the constant C of the Spearman's rho correlation coefficient in the draft +## +def compute_C(rank1, rank2): + N = len(rank1) + [mu1, mu2] = [1.0 * sum(x.values()) / len(x.values()) for x in [rank1, rank2]] + [sum1, sum2] = [1.0 * sum(x.values()) for x in [rank1, rank2]] + C = N * mu1 * mu2 - mu2 * sum1 - mu1 * sum2 + + #print mu1, mu2, sum1, sum2, C + + return C + + +## +## Compute the constant D of the Spearman's rho correlation coefficient in the draft +## +def compute_D(rank1, rank2): + [mu1, mu2] = [1.0 * sum(x.values()) / len(x.values()) for x in [rank1, rank2]] + s1 = sum([math.pow((x-mu1), 2) for x in rank1.values()]) + s2 = sum([math.pow((x-mu2), 2) for x in rank2.values()]) + + D = math.sqrt(s1 * s2) + return D + +def compute_rho(rank1, rank2, pairing, C, D): + + rho = 0 + for s,t in pairing: + rho += rank1[s] * rank2[t] + rho = (rho + C) / D + return rho + +if len(sys.argv) < 3: + print "Usage: %s <rank1> <rank2> [<pairing>]" % sys.argv[0] + sys.exit(1) + +rank1 = {} +rank2 = {} + +lines = open(sys.argv[1], "r").readlines() + +i = 0 +for l in lines: + if l[0] == "#" or l.strip(" \n").split(" ") == []: ## Strip comments and empty lines + continue + r = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split(" ")][0] + rank1[i] = r + i += 1 + + +lines = open(sys.argv[2], "r").readlines() + +i = 0 +for l in lines: + if l[0] == "#" or l.strip(" \n").split(" ") == []: ## Strip comments and empty lines + continue + r = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split(" ")][0] + rank2[i] = r + i += 1 + + +N1 = len(rank1) +N2 = len(rank2) + +if (N1 != N2): + print "The two files must have the same number of nodes!!!!!" + sys.exit(2) + + + +C = compute_C(rank1, rank2) +D = compute_D(rank1, rank2) + + +## We start from a random pairing, and compute the corresponding value of rho + +pairing = [] + +if len(sys.argv) > 3: + lines = open(sys.argv[3], "r").readlines() + + + + for l in lines: + if l[0] == "#" or l.strip(" \n").split(" ") == []: + continue + p1, p2 = [int(x) for x in l.strip(" \n").split(" ")][:2] + pairing.append((p1, p2)) + +else: + for i in range (N1): + pairing.append((i,i)) + + +if len(pairing) != N1: + print "ERROR !!! The pairing should be of the same length of the ranking files!!!" + sys.exit(1) + +rho = compute_rho(rank1, rank2, pairing, C, D) + +print rho + + |