# This file is part of MAMMULT: Metrics And Models for Multilayer Networks
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
####
##
## Take as input two files, whose n^th line contains the ranking of
## element n, and compute the Kendall's \tau_b rank correlation
## coefficient
##
##
import sys
from numpy import *
def kendalltau(x,y):
initial_sort_with_lexsort = True # if True, ~30% slower (but faster under profiler!) but with better worst case (O(n log(n)) than (quick)sort (O(n^2))
n = len(x)
temp = range(n) # support structure used by mergesort
# this closure recursively sorts sections of perm[] by comparing
# elements of y[perm[]] using temp[] as support
# returns the number of swaps required by an equivalent bubble sort
def mergesort(offs, length):
exchcnt = 0
if length == 1:
return 0
if length == 2:
if y[perm[offs]] <= y[perm[offs+1]]:
return 0
t = perm[offs]
perm[offs] = perm[offs+1]
perm[offs+1] = t
return 1
length0 = length / 2
length1 = length - length0
middle = offs + length0
exchcnt += mergesort(offs, length0)
exchcnt += mergesort(middle, length1)
if y[perm[middle - 1]] < y[perm[middle]]:
return exchcnt
# merging
i = j = k = 0
while j < length0 or k < length1:
if k >= length1 or (j < length0 and y[perm[offs + j]] <= y[perm[middle + k]]):
temp[i] = perm[offs + j]
d = i - j
j += 1
else:
temp[i] = perm[middle + k]
d = (offs + i) - (middle + k)
k += 1
if d > 0:
exchcnt += d;
i += 1
perm[offs:offs+length] = temp[0:length]
return exchcnt
# initial sort on values of x and, if tied, on values of y
if initial_sort_with_lexsort:
# sort implemented as mergesort, worst case: O(n log(n))
perm = lexsort((y, x))
else:
# sort implemented as quicksort, 30% faster but with worst case: O(n^2)
perm = range(n)
perm.sort(lambda a,b: cmp(x[a],x[b]) or cmp(y[a],y[b]))
# compute joint ties
first = 0
t = 0
for i in xrange(1,n):
if x[perm[first]] != x[perm[i]] or y[perm[first]] != y[perm[i]]:
t += ((i - first) * (i - first - 1)) / 2
first = i
t += ((n - first) * (n - first - 1)) / 2
# compute ties in x
first = 0
u = 0
for i in xrange(1,n):
if x[perm[first]] != x[perm[i]]:
u += ((i - first) * (i - first - 1)) / 2
first = i
u += ((n - first) * (n - first - 1)) / 2
# count exchanges
exchanges = mergesort(0, n)
# compute ties in y after mergesort with counting
first = 0
v = 0
for i in xrange(1,n):
if y[perm[first]] != y[perm[i]]:
v += ((i - first) * (i - first - 1)) / 2
first = i
v += ((n - first) * (n - first - 1)) / 2
tot = (n * (n - 1)) / 2
if tot == u and tot == v:
return 1 # Special case for all ties in both ranks
tau = ((tot-(v+u-t)) - 2.0 * exchanges) / (sqrt(float(( tot - u )) * float( tot - v )))
# what follows reproduces ending of Gary Strangman's original stats.kendalltau() in SciPy
svar = (4.0*n+10.0) / (9.0*n*(n-1))
z = tau / sqrt(svar)
##prob = erfc(abs(z)/1.4142136)
##return tau, prob
return tau
def main():
if len(sys.argv) < 3:
print "Usage: %s " % sys.argv[0]
sys.exit(1)
x1 = []
x2= []
lines = open(sys.argv[1]).readlines()
for l in lines:
elem = [float(x) if "e" in x or "." in x else int(x) for x in l.strip(" \n").split()][0]
x1.append(elem)
lines = open(sys.argv[2]).readlines()
for l in lines:
elem = [float(x) if "e" in x or "." in x else int(x) for x in l.strip(" \n").split()][0]
x2.append(elem)
tau = kendalltau(x1,x2)
print tau
if __name__ == "__main__":
main()