summaryrefslogtreecommitdiff
path: root/models/nullmodels
diff options
context:
space:
mode:
authorKatolaZ <katolaz@yahoo.it>2015-10-19 16:23:00 +0100
committerKatolaZ <katolaz@yahoo.it>2015-10-19 16:23:00 +0100
commitdf8386f75b0538075d72d52693836bb8878f505b (patch)
tree704c2a0836f8b9fd9f470c12b6ae05637c431468 /models/nullmodels
parent363274e79eade464247089c105260bc34940da07 (diff)
First commit of MAMMULT code
Diffstat (limited to 'models/nullmodels')
-rw-r--r--models/nullmodels/model_MDM.py66
-rw-r--r--models/nullmodels/model_MSM.py65
-rw-r--r--models/nullmodels/model_hypergeometric.py56
-rw-r--r--models/nullmodels/model_layer_growth.py121
4 files changed, 308 insertions, 0 deletions
diff --git a/models/nullmodels/model_MDM.py b/models/nullmodels/model_MDM.py
new file mode 100644
index 0000000..5b0a969
--- /dev/null
+++ b/models/nullmodels/model_MDM.py
@@ -0,0 +1,66 @@
+####
+##
+##
+## This is the vertical participation model. For each node i, we use
+## exactly the same value of B_i as in the original network, but we
+## choose at random the layers in which node i will be active. This
+## breaks down intra-layer correlations.
+##
+## We get as input a file which reports, for each value of B_i, the
+## number of nodes in the original network which have that value, i the format:
+##
+## B_i N(B_i)
+##
+##
+##
+## The output is the obtained distribution of bit-strings.
+##
+##
+
+import sys
+import random
+
+
+def to_binary(l):
+ s = 0
+ e = 0
+ for v in l:
+ s += v * pow(2,e)
+ e +=1
+ return s
+
+
+if len(sys.argv) < 3:
+ print "Usage: %s <Bi_file> <M>" % sys.argv[0]
+ sys.exit(1)
+
+M = int(sys.argv[2])
+
+layers = range(M)
+
+distr = {}
+
+with open(sys.argv[1], "r") as f:
+ for l in f:
+ if l[0] == "#":
+ continue
+ val, num = [int(x) for x in l.strip(" \n").split(" ")]
+ for j in range(num):
+ node_layers = random.sample(layers, val)
+ node_bitstring = [0 for x in range(M)]
+ #print node_bitstring, node_layers
+ for i in node_layers:
+ #print i,
+ node_bitstring[i] = 1
+ #print node_bitstring
+
+ bs = to_binary(node_bitstring)
+ if bs in distr:
+ distr[bs] += 1
+ else:
+ distr[bs] = 1
+
+for k in distr:
+ bin_list = bin(k)
+ bin_num = sum([int(x) if x=='1' else 0 for x in bin_list[2:]])
+ sys.stderr.write("%d %0175s %d \n" % (bin_num, bin_list[2:], distr[k]))
diff --git a/models/nullmodels/model_MSM.py b/models/nullmodels/model_MSM.py
new file mode 100644
index 0000000..2c30df5
--- /dev/null
+++ b/models/nullmodels/model_MSM.py
@@ -0,0 +1,65 @@
+####
+##
+##
+## Create a synthetic multiplex network in which a node $i$ appears at
+## each layer $\alpha$ with a probability equal to $B_i$, which is the
+## fraction of layers in which node $i$ participate in the original
+## multiplex.
+##
+## Take a file of node binary participation indices, and sample a
+## multiplex compatible with that distribution
+##
+##
+## The input file is in the format:
+##
+## nodeID_i B_i
+##
+## The output file is a node-layer participation file, in the format
+##
+## node_id1 layer_id1
+## node_id2 layer_id2
+## .....
+##
+
+import sys
+
+import random
+
+if len(sys.argv) < 3:
+ print "Usage: %s <filein> <M>" % sys.argv[0]
+ sys.exit(1)
+
+M = int(sys.argv[2])
+
+bin_index = {}
+node_ids = []
+
+lines = open(sys.argv[1]).readlines()
+
+
+for l in lines:
+ if l[0] == "#":
+ continue
+ elems = [int(x) for x in l.strip(" \n").split(" ")]
+ bin_index[elems[0]] = 1.0 * elems[1]/M
+ node_ids.append(elems[0])
+
+N = len(node_ids)
+
+node_layers = {}
+
+for alpha in range (M):
+ for i in node_ids:
+ val = random.random()
+ if val < bin_index[i]:
+ if node_layers.has_key(i):
+ node_layers[i].append(alpha)
+ else:
+ node_layers[i] = [alpha]
+
+
+for i in node_ids:
+ if node_layers.has_key(i):
+ for j in range(len(node_layers[i])):
+ print i, node_layers[i][j]
+
diff --git a/models/nullmodels/model_hypergeometric.py b/models/nullmodels/model_hypergeometric.py
new file mode 100644
index 0000000..0a36237
--- /dev/null
+++ b/models/nullmodels/model_hypergeometric.py
@@ -0,0 +1,56 @@
+####
+##
+## This is the hypergeometric model. Each layer has the same number of
+## non-isolated nodes as the initial graph, but the nodes are
+## activated at random. The input is a file of number of non-isolated
+## nodes in each layer, and the total number of nodes in the multiplex.
+##
+## The output file is a node-layer participation file, in the format
+##
+## node_id1 layer_id1
+## node_id2 layer_id2
+## .....
+##
+
+import sys
+import random
+
+if len(sys.argv) < 3:
+ print "Usage: %s <layer_N_file> <N>" % sys.argv[0]
+ sys.exit(1)
+
+N = int(sys.argv[2])
+
+layer_degs = []
+node_layers = {}
+
+lines = open(sys.argv[1]).readlines()
+
+M = 0
+
+
+for l in lines:
+ if l[0] == "#":
+ continue
+ n = [int(x) for x in l.strip(" \n").split(" ")][0]
+ layer_degs.append(n)
+ M += 1
+
+for i in range(M):
+ num = layer_degs[i]
+ added = []
+ n = 0
+ while n < num:
+ j = random.choice(range(N))
+ if j not in added:
+ n += 1
+ added.append(j)
+ if node_layers.has_key(j):
+ node_layers[j].append(i)
+ else:
+ node_layers[j] = [i]
+
+for n in node_layers.keys():
+ for i in node_layers[n]:
+ print n,i
+
diff --git a/models/nullmodels/model_layer_growth.py b/models/nullmodels/model_layer_growth.py
new file mode 100644
index 0000000..46b4c0f
--- /dev/null
+++ b/models/nullmodels/model_layer_growth.py
@@ -0,0 +1,121 @@
+####
+##
+## layer-by-layer multiplex growth
+##
+## - We start from a multiplex with M_0 layers, with a certain number of
+## active nodes each
+##
+## - Each new layer arrives with a certain number N\lay{\alpha} of nodes
+## to be activated (this number is sampled from the observed distribution
+## of N\lay{\alpha}, like in the airlines multiplex)
+##
+## - Each node $i$ is activated with a probability proportional to the
+## number of existing layers in which it is already active, added to an
+## attractivity A :
+##
+## P_i(t) \propto A + B_i(t)
+##
+## - the hope is that A might tune the exponent of the resulting distribution
+## of B_i.....
+##
+##
+## This script takes as input a file which contains the degrees of the
+## layers, the total number of nodes in the multiplex, the initial
+## number M0 of layers in the multiplex and the attractiveness
+## parameter A. If "RND" is specified as a third parameter, then the
+## sequence of N\lay{\alpha} is shuffled
+##
+## Gives as output a list of node-layer participation
+##
+
+import sys
+import random
+
+if len(sys.argv) < 5:
+ print "Usage: %s <layers_N> <N> <M0> <A> [RND]" % sys.argv[0]
+ sys.exit(1)
+
+N = int(sys.argv[2])
+M0 = int(sys.argv[3])
+A = int(sys.argv[4])
+
+layer_degs = []
+
+
+if len(sys.argv) > 5 and sys.argv[5] == "RND":
+ randomize = True
+else:
+ randomize = False
+
+lines = open(sys.argv[1]).readlines()
+
+M = 0
+
+
+for l in lines:
+ if l[0] == "#":
+ continue
+ n = [int(x) for x in l.strip(" \n").split(" ")][0]
+ layer_degs.append(n)
+ M += 1
+
+
+if randomize:
+ random.shuffle(layer_degs)
+
+## the list node_Bi contains, at each time, the attachment
+## probabilities, i.e. it is a list which contains each node $i$
+## exactly $A + B_i$ times
+
+node_Bi = []
+
+#
+# initialize the distribution of attachment proibabilities, giving to
+# all nodes an attachment probability equal to A
+#
+
+for i in range(N):
+ for j in range(A):
+ node_Bi.append(i)
+
+layers = []
+
+
+for i in range(M0):
+ N_alpha = layer_degs.pop()
+ node_list = []
+ num_nodes = 0
+ while num_nodes < N_alpha:
+ val = random.choice(range(N))
+ if val not in node_list:
+ node_list.append(val)
+ num_nodes += 1
+ for n in node_list:
+ node_Bi.append(n)
+ layers.append(node_list)
+ i += 1
+
+
+#sys.exit(1)
+
+while i < M:
+ N_alpha = layer_degs.pop()
+ node_list = []
+ num_nodes = 0
+ while num_nodes < N_alpha:
+ val = random.choice(node_Bi)
+ if val not in node_list:
+ node_list.append(val)
+ num_nodes += 1
+ for n in node_list:
+ node_Bi.append(n)
+ layers.append(node_list)
+ i += 1
+
+#print layers
+
+for i in range(M):
+ node_list = layers[i]
+ for n in node_list:
+ print n, i
+