summaryrefslogtreecommitdiff
path: root/structure/correlations/compute_rho.py
diff options
context:
space:
mode:
authorKatolaZ <katolaz@yahoo.it>2015-10-19 16:23:00 +0100
committerKatolaZ <katolaz@yahoo.it>2015-10-19 16:23:00 +0100
commitdf8386f75b0538075d72d52693836bb8878f505b (patch)
tree704c2a0836f8b9fd9f470c12b6ae05637c431468 /structure/correlations/compute_rho.py
parent363274e79eade464247089c105260bc34940da07 (diff)
First commit of MAMMULT code
Diffstat (limited to 'structure/correlations/compute_rho.py')
-rw-r--r--structure/correlations/compute_rho.py118
1 files changed, 118 insertions, 0 deletions
diff --git a/structure/correlations/compute_rho.py b/structure/correlations/compute_rho.py
new file mode 100644
index 0000000..8fa4174
--- /dev/null
+++ b/structure/correlations/compute_rho.py
@@ -0,0 +1,118 @@
+####
+##
+## Get two rankings and a pairing, and compute the corresponding
+## Spearman's rho rank correlation coefficient
+##
+##
+##
+##
+
+import sys
+import random
+import math
+#import scipy.stats
+
+##
+## Compute the constant C of the Spearman's rho correlation coefficient in the draft
+##
+def compute_C(rank1, rank2):
+ N = len(rank1)
+ [mu1, mu2] = [1.0 * sum(x.values()) / len(x.values()) for x in [rank1, rank2]]
+ [sum1, sum2] = [1.0 * sum(x.values()) for x in [rank1, rank2]]
+ C = N * mu1 * mu2 - mu2 * sum1 - mu1 * sum2
+
+ #print mu1, mu2, sum1, sum2, C
+
+ return C
+
+
+##
+## Compute the constant D of the Spearman's rho correlation coefficient in the draft
+##
+def compute_D(rank1, rank2):
+ [mu1, mu2] = [1.0 * sum(x.values()) / len(x.values()) for x in [rank1, rank2]]
+ s1 = sum([math.pow((x-mu1), 2) for x in rank1.values()])
+ s2 = sum([math.pow((x-mu2), 2) for x in rank2.values()])
+
+ D = math.sqrt(s1 * s2)
+ return D
+
+def compute_rho(rank1, rank2, pairing, C, D):
+
+ rho = 0
+ for s,t in pairing:
+ rho += rank1[s] * rank2[t]
+ rho = (rho + C) / D
+ return rho
+
+if len(sys.argv) < 3:
+ print "Usage: %s <rank1> <rank2> [<pairing>]" % sys.argv[0]
+ sys.exit(1)
+
+rank1 = {}
+rank2 = {}
+
+lines = open(sys.argv[1], "r").readlines()
+
+i = 0
+for l in lines:
+ if l[0] == "#" or l.strip(" \n").split(" ") == []: ## Strip comments and empty lines
+ continue
+ r = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split(" ")][0]
+ rank1[i] = r
+ i += 1
+
+
+lines = open(sys.argv[2], "r").readlines()
+
+i = 0
+for l in lines:
+ if l[0] == "#" or l.strip(" \n").split(" ") == []: ## Strip comments and empty lines
+ continue
+ r = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split(" ")][0]
+ rank2[i] = r
+ i += 1
+
+
+N1 = len(rank1)
+N2 = len(rank2)
+
+if (N1 != N2):
+ print "The two files must have the same number of nodes!!!!!"
+ sys.exit(2)
+
+
+
+C = compute_C(rank1, rank2)
+D = compute_D(rank1, rank2)
+
+
+## We start from a random pairing, and compute the corresponding value of rho
+
+pairing = []
+
+if len(sys.argv) > 3:
+ lines = open(sys.argv[3], "r").readlines()
+
+
+
+ for l in lines:
+ if l[0] == "#" or l.strip(" \n").split(" ") == []:
+ continue
+ p1, p2 = [int(x) for x in l.strip(" \n").split(" ")][:2]
+ pairing.append((p1, p2))
+
+else:
+ for i in range (N1):
+ pairing.append((i,i))
+
+
+if len(pairing) != N1:
+ print "ERROR !!! The pairing should be of the same length of the ranking files!!!"
+ sys.exit(1)
+
+rho = compute_rho(rank1, rank2, pairing, C, D)
+
+print rho
+
+