From 3aee2fd43e3059a699af2b63c6f2395e5a55e515 Mon Sep 17 00:00:00 2001
From: KatolaZ <katolaz@freaknet.org>
Date: Wed, 27 Sep 2017 15:06:31 +0100
Subject: First commit on github -- NetBunch 1.0

---
 src/f3m/f3m.c | 657 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 657 insertions(+)
 create mode 100644 src/f3m/f3m.c

(limited to 'src/f3m/f3m.c')

diff --git a/src/f3m/f3m.c b/src/f3m/f3m.c
new file mode 100644
index 0000000..cfe1146
--- /dev/null
+++ b/src/f3m/f3m.c
@@ -0,0 +1,657 @@
+/**
+ *  This program is free software: you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License as
+ *  published by the Free Software Foundation, either version 3 of the
+ *  License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see
+ *  <http://www.gnu.org/licenses/>.
+ *
+ *  (c) Vincenzo Nicosia 2009-2017 -- <v.nicosia@qmul.ac.uk>
+ * 
+ *  This file is part of NetBunch, a package for complex network
+ *  analysis and modelling. For more information please visit:
+ *
+ *             http://www.complex-networks.net/
+ *
+ *  If you use this software, please add a reference to 
+ *
+ *               V. Latora, V. Nicosia, G. Russo             
+ *   "Complex Networks: Principles, Methods and Applications"
+ *              Cambridge University Press (2017) 
+ *                ISBN: 9781107103184
+ *
+ ***********************************************************************
+ *
+ *  Enumerate all the three-nodes subgraphs in a directed network, and
+ *  compute the significance of their number with respect to the
+ *  corresponding configuration model ensemble.
+ * 
+ *  References:
+ *
+ * [1] R. Milo et al. "Network Motifs: Simple Building Blocks of
+ *     Complex Networks".  Science 298 (2002), 824-827.  
+ * 
+ * [2] R. Milo et al. "Superfamilies of evolved and designed
+ *     networks." Science 303 (2004), 1538-1542
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <math.h>
+
+#include "utils.h"
+
+
+
+void usage(char *argv[]){
+
+  printf("********************************************************************\n"
+         "**                                                                **\n"
+         "**                        -*-  f3m  -*-                           **\n"
+         "**                                                                **\n"
+         "**  Count all the 3-node subgraphs of a directed graph given as   **\n"
+         "**  input, and compute the relevance (z-score) of each motif      **\n"
+         "**  with respect to the corresponding configuration model graph   **\n"
+         "**  ensemble.                                                     **\n"
+         "**                                                                **\n"
+         "**  The file 'graph_in' contains the edge list of the graph.      **\n"
+         "**                                                                **\n"
+         "**  The program prints on STDOUT one line for each of the 13      **\n"
+         "**  motifs, in the format                                         **\n"
+         "**                                                                **\n"
+         "**      motif    count    mean_rnd     std_rnd     z-score        **\n"
+         "**                                                                **\n"
+         "**  where 'motif' is the motif number (an integer between 1 and   **\n"
+         "**  13), 'count' is the number of subgraphs of that type found    **\n"
+         "**  in 'graph_in', 'mean_rnd' is the average number of those      **\n"
+         "**  subgraphs found in the randomised realisations of the graph,  **\n"
+         "**  'std_rnd' is the standard deviation associated to 'avg_rnd',  **\n"
+         "**  and 'z-score' is the normalised deviation of 'count' from     **\n"
+         "**  'mean_rnd'.                                                   **\n"
+         "**                                                                **\n"
+         "**  If the (optional) parameter 'num_random' is provided, use     **\n"
+         "**  that number of random realisations to compute the z-score.    **\n"
+         "**                                                                **\n"
+         "********************************************************************\n"
+         " This is Free Software - You can use and distribute it under \n"
+         " the terms of the GNU General Public License, version 3 or later\n\n"
+         " Please visit http://www.complex-networks.net for more information\n\n"
+         " (c) Vincenzo Nicosia 2009-2017 (v.nicosia@qmul.ac.uk)\n"
+         "********************************************************************\n\n"
+         );
+  printf("Usage: %s <graph_in> [<num_random>]\n", argv[0]);
+}
+
+
+
+
+
+
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
+
+
+typedef struct{
+  unsigned int N;
+  unsigned int K;
+  unsigned int *J_slap;
+  unsigned int *r_slap;
+} graph_t;
+
+
+typedef struct{
+  double f_count_real[13];
+  int num_rnd;
+  double **f_count_rnd;
+} mstats_t;
+
+
+char perm12[3][3] = {{0, 1, 0}, 
+                    {1, 0, 0},
+                    {0, 0, 1}};
+
+char perm13[3][3] = {{0, 0, 1}, 
+                    {0, 1, 0},
+                    {1, 0, 0}};
+
+char perm23[3][3] = {{1, 0, 0}, 
+                    {0, 0, 1},
+                    {0, 1, 0}};
+
+
+
+void shuffle_list(unsigned int *v, unsigned int K){
+  
+  int i, pos;
+  
+  for(i=K-1; i>=0; i--){
+    pos = rand() % K;
+    if (pos != i){
+      v[i] ^= v[pos];
+      v[pos] ^= v[i];
+      v[i] ^= v[pos];
+    }
+  }
+}
+
+int is_simple_graph(unsigned int *J_slap, unsigned int *r_slap, unsigned int K, 
+                    unsigned int N){
+  
+  int i, j;
+  for(i=0; i<N; i++){
+    for(j=r_slap[i]; j<r_slap[i+1]; j++){
+      if (J_slap[j] == i) /* If there is a self-loop....*/
+        return 0;
+      if (j > r_slap[i] && J_slap[j] == J_slap[j-1]) /* or a double edge... */
+        return 0;
+    }
+  }
+  return 1;
+}
+
+int is_loop_free(unsigned int *J_slap, unsigned int *r_slap, unsigned int K, 
+                    unsigned int N){
+  
+  int i, j;
+  for(i=0; i<N; i++){
+    for(j=r_slap[i]; j<r_slap[i+1]; j++){
+      if (J_slap[j] == i) /* There is a self-loop....*/
+        return 0;
+    }
+  }
+  return 1;
+}
+
+
+
+unsigned int* sample_conf_model_smart(unsigned int *J_slap, unsigned int *r_slap, 
+                                      unsigned int K, unsigned int N){
+  
+  unsigned int *new_J;
+
+  new_J = malloc( K * sizeof(unsigned int));
+  
+  memcpy(new_J, J_slap, K *sizeof(unsigned int));
+
+  while(1){
+    shuffle_list(new_J, K);
+    sort_neighbours(new_J, r_slap, N);
+    if(is_loop_free(new_J, r_slap, K, N))
+      break;
+  }
+  return new_J;
+}
+
+
+
+void apply_perm_3(char m[3][3], char p[3][3]){
+
+  char res[3][3];
+  
+  int i, j, k;
+  for (i=0; i<3; i++){
+    for(j=0; j<3; j++){
+      res[i][j] = 0;
+      for(k=0; k<3; k++){
+        res[i][j] += p[i][k] * m[k][j];
+      }
+    }
+  }
+
+  for (i=0; i<3; i++){
+    for(j=0; j<3; j++){
+      m[i][j] = 0;
+      for(k=0; k<3; k++){
+        m[i][j] += res[i][k] * p[k][j];
+      }
+    }
+  }
+
+}
+
+int row_value(char r[3]){ /* The "value" of a row of bits is
+                                    equal to the binary representation
+                                    of the row, in big-endian (i.e.,
+                                    LSB is r[0], MSB is r[2])*/
+  
+  return r[0] + (r[1]<<1) + (r[2]<<2);
+}
+
+int matrix_value(char m[3][3]){ /* The value of a matrix of
+                                          bits is equal to the binary
+                                          representation of the
+                                          matrix, in big endian,
+                                          starting from the first row
+                                          (LSB is m[0][0], MSB is
+                                          m[2][2])*/
+  
+  return row_value(m[0]) + (row_value(m[1])<<3) + (row_value(m[2])<<6);
+}
+
+
+void permute_matrix_3(char m[3][3], int n1, int n2){
+  
+  int perm;
+
+  if (n1 == n2){
+    return;
+  }
+  if (n1 > n2){
+    n1 ^= n2;
+    n2 ^= n1;
+    n1 ^= n2;
+  }
+  
+  perm = n1 + (n2<<2);
+  
+  switch(perm){
+  case (1 + (2<<2)): /* permute 1 with 2 */
+    apply_perm_3(m, perm12);
+    break;
+  case (1 + (3<<2)): /* permute 1 with 3 */
+    apply_perm_3(m, perm13);
+    break;
+  case (2 + (3<<2)): /* permute 2 with 3 */
+    apply_perm_3(m, perm23);
+    break;
+  }
+}
+
+
+
+/* Load the input graph. We construct two versions of the graph,
+   i.e. the directed versions G_out ( containing the list of
+   out-neighbours of each node) and the underlying undirected graph
+   G_u
+   
+   N.B.: This is quite inefficient at the moment, since it reads the
+   file twice, and could be replaced by one call to read_ij and two
+   appropriate calls to convert_ij2slap.... */
+
+void load_graph(FILE *fin, graph_t *G_u, graph_t *G_out){
+
+  /*FIXME!!!! WE CANNOT REWIND THE STANDARD OUTPUT !!!!! */
+  read_slap(fin, &(G_u->K), &(G_u->N), &(G_u->J_slap), &(G_u->r_slap));
+  sort_neighbours(G_u->J_slap, G_u->r_slap, G_u->N);
+  rewind(fin);
+  read_slap_dir(fin, &(G_out->K), &(G_out->N), &(G_out->J_slap), &(G_out->r_slap));
+  sort_neighbours(G_out->J_slap, G_out->r_slap, G_out->N);
+  rewind(fin);
+  
+}
+
+
+void dump_matrix_3(char m[3][3]){
+  
+  int i, j;
+
+  for(i=0; i<3; i++){
+    for(j=0; j<3; j++){
+      printf("%d ", m[i][j]);
+    }
+    printf("\n");
+  }
+}
+
+
+int motif_number(char m[3][3]){
+  
+  char m0[3][3];
+  char m1[3][3];
+  char m2[3][3];
+  char m3[3][3];
+  
+  int v, v0, v1, v2, v3, v4, v5;
+  int i,j;
+  
+  for(i=0; i<3; i++){
+    for(j=0; j<3; j++){
+      m0[i][j] = m[i][j];
+    }
+  }
+  
+  if (row_value(m[0]) == 0){
+    permute_matrix_3(m0, 1, 2);
+  }
+  if (row_value(m0[1]) == 0){
+    permute_matrix_3(m0, 2, 3);
+  }
+  
+  
+  for(i=0; i<3; i++){
+    for(j=0; j<3; j++){
+      m1[i][j] = m0[i][j];
+      m2[i][j] = m0[i][j];
+      m3[i][j] = m0[i][j];
+    }
+  }
+  
+  /* We consider here all the 6 possible permutations... */
+  
+  /* {0, 1, 2} */
+  v0 = matrix_value(m0);
+  /* {1, 0, 2} */
+  permute_matrix_3(m1, 1, 2);
+  v1 = matrix_value(m1);
+  /* {2, 1, 0} */
+  permute_matrix_3(m2, 1, 3);
+  v2 = matrix_value(m2);
+  /* {0, 2, 1} */
+  permute_matrix_3(m3, 2, 3);
+  v3 = matrix_value(m3);
+  /* {1, 2, 0} */
+  permute_matrix_3(m2, 1, 2);
+  v4 = matrix_value(m2);
+  /* {2, 0, 1} */
+  permute_matrix_3(m3, 1, 2);
+  v5 = matrix_value(m3);
+  
+  v = MIN (MIN( MIN( MIN( MIN( v0, v1), v2), v3), v4), v5);
+  
+  switch(v){
+  case 6:
+    return 0;
+  case 12:
+    return 1;
+  case 14:
+    return 2;
+  case 36:
+    return 3;
+  case 38:
+    return 4;
+  case 46:
+    return 5;
+  case 74:
+    return 6;
+  case 78:
+    return 7;
+  case 98:
+    return 8;
+  case 102:
+    return 9;
+  case 108:
+    return 10;
+  case 110:
+    return 11;
+  case 238:
+    return 12;
+  default:
+    fprintf(stderr, "No motif with number %d! Exiting\n", v);
+    dump_matrix_3(m);
+    exit(5);
+  }
+}
+
+int get_motif_3(int n1, int n2, int n3, graph_t *G_out){
+  
+  char m[3][3];
+  unsigned int n[3] = {n1, n2, n3};
+  
+  int i, j, v;
+  
+  for(i=0; i<3; i++){
+    for (j=0; j<3; j++){
+      if (is_neigh(G_out->J_slap, G_out->r_slap, G_out->N, 
+                      n[i], n[j])){
+        m[i][j] = 1;
+      }
+      else{
+        m[i][j] = 0;
+      }
+    }
+  }
+  v = motif_number(m);
+  return v;
+}
+
+
+
+void find_subgraphs_3(graph_t *G_u, graph_t *G_out, double *f_cnt){
+  
+  int i, j, k, n1, n2;
+  int val;
+
+  for (i=0; i<G_u->N; i++){
+    for(n1 = G_u->r_slap[i]; n1<G_u->r_slap[i+1]; n1++){
+      /* j is a first-neighbour of i in G_u */
+      j = G_u->J_slap[n1];
+      /* avoid multiple entries in the J_slap vector */
+      if (n1 > G_u->r_slap[i] && j == G_u->J_slap[n1-1])
+        continue;
+      for(n2 = n1+1; n2 < G_u->r_slap[i+1]; n2++){
+        /* and k is another first neighbour of i in G_u */
+        k = G_u->J_slap[n2];
+      /* avoid multiple entries in the J_slap vector */
+        if (n2 > n1+1 && k == G_u->J_slap[n2-1])
+          continue;
+        /* now, if j and k are connected by an edge, we consider this
+           triangle only if i<j<k (in order to avoid multiple counts).
+           Otherwise, if i-j-k is an open triad, we have to consider
+           it now, because there is no other possibility to discover
+           it */
+        if((is_neigh(G_u->J_slap, G_u->r_slap, G_u->N, j, k) && 
+            (j < i || k < j || k < i)) || (j==k))
+          continue;
+        val = get_motif_3(i, j, k, G_out);
+        f_cnt[val] +=1;
+      }
+    }
+  }
+  
+}
+
+void init_graph(graph_t *G1){
+  G1->J_slap = G1->r_slap = NULL;
+}
+
+void init_stats(mstats_t *st, int n_rand){
+  int i;
+
+  st->f_count_rnd = malloc(n_rand * sizeof(double*));
+  
+  st->num_rnd = n_rand;
+
+  for(i=0; i<13; i++){
+    st->f_count_real[i] = 0;
+  }
+  
+  for(i=0; i<n_rand; i++){
+    st->f_count_rnd[i] = malloc(13 * sizeof(double));
+    memset(st->f_count_rnd[i], 0, 13 * sizeof(double));
+  }
+}
+
+
+void compute_rnd_st_mean_std(mstats_t *st, double *mean, double *std){
+  
+  double sum[13], sum2[13];
+  double val, n;
+
+  int i, j;
+
+  
+  n = st->num_rnd;
+
+  for (i=0; i<13; i++){
+    sum[i] = sum2[i] = 0;
+  }
+
+  if (n == 0)
+    return;
+  
+  for(i=0; i<n; i++){
+    for(j=0; j<13; j++){
+      val = st->f_count_rnd[i][j];
+      sum[j] += val;
+      sum2[j] += val*val;
+    }
+  }
+  
+  for(i=0; i<13; i++){
+    mean[i] = sum[i] / n;
+    if (sum2[i] > 0)
+      std[i] = sqrt(sum2[i] * 1.0/(n-1) - 1.0/( n * (n-1)) * sum[i]*sum[i]);
+    else
+      std[i] = 0.0;
+  }
+}
+
+
+
+ 
+
+void dump_stats(mstats_t *st){
+  
+  int i;
+  double v_mean[13], v_std[13], x;
+
+  memset(v_mean, 0, 13 * sizeof(double));
+  memset(v_std, 0, 13 * sizeof(double));
+  
+  compute_rnd_st_mean_std(st, v_mean, v_std);
+  for(i=0; i<13; i++){
+    x = st->f_count_real[i];
+    if (v_std[i] > 0)
+      printf("%-2d %12.0f %15.2f %10.3f %+10.3f\n", i+1, x, 
+             v_mean[i], v_std[i], 1.0 * (x - v_mean[i])/v_std[i] );
+    else
+      printf("%-2d %12.0f %15.2f %10.3f %+10.3f\n", i+1, x, 
+             v_mean[i], v_std[i], 0.0);
+    
+  }
+}
+
+void randomise_graph(graph_t *G_out, graph_t *RNDG_out, graph_t *RNDG_u){
+  
+  static unsigned int *I, *J; 
+  static unsigned int I_size, J_size;
+  unsigned int *tmp;
+
+  if (!I || I_size < 2*G_out->K){
+    tmp = realloc(I, G_out -> K * 2 * sizeof(unsigned int)); 
+    VALID_PTR_OR_EXIT(tmp, 3);
+    I = tmp;
+    I_size = 2*G_out->K;
+  }
+
+
+  if (!J || J_size < 2*G_out->K){
+    tmp = realloc(J, G_out -> K * 2 * sizeof(unsigned int)); 
+    VALID_PTR_OR_EXIT(tmp, 3);
+    J = tmp;
+    J_size = 2*G_out->K;
+  }
+  
+  if (RNDG_out->J_slap){
+    free(RNDG_out->J_slap);
+    RNDG_out->J_slap = NULL;
+  }
+  
+  
+  RNDG_out->J_slap = sample_conf_model_smart(G_out->J_slap, G_out->r_slap, G_out->K, G_out->N);
+
+  tmp = realloc(RNDG_out->r_slap, (G_out->N + 1) * sizeof(unsigned int));
+  VALID_PTR_OR_EXIT(tmp, 19);
+  RNDG_out->r_slap = tmp;
+  memcpy(RNDG_out->r_slap, G_out->r_slap, (G_out->N + 1) * sizeof(unsigned int));
+  RNDG_out->N = G_out->N;
+  RNDG_out->K = G_out->K;
+
+
+  convert_slap2ij(RNDG_out->J_slap, RNDG_out->r_slap, RNDG_out->N, I, J);
+  
+  /* copy J at the end of I */
+  memcpy(&(I[G_out->K]), J, G_out->K * sizeof(unsigned int));
+  /* copy I at the end of J */
+  memcpy(&(J[G_out->K]), I, G_out->K * sizeof(unsigned int));
+
+
+  RNDG_u->N = convert_ij2slap(I, J, 2*G_out->K, & (RNDG_u->r_slap), &(RNDG_u->J_slap));
+  
+  RNDG_u->K = 2 * G_out->K;
+
+  sort_neighbours(RNDG_u->J_slap, RNDG_u->r_slap, RNDG_u->N);
+  sort_neighbours(RNDG_out->J_slap, RNDG_out->r_slap, RNDG_out->N);
+  
+  if (!is_loop_free(RNDG_u->J_slap, RNDG_u->r_slap, RNDG_u->K, RNDG_u->N)){
+    fprintf(stderr, "Error!!!! The undirected version of the graph is not loop-free!!!!\n");
+    exit(23);
+  }
+  
+}
+
+
+int main(int argc, char *argv[]){
+
+  graph_t G_u, G_out, RNDG_u, RNDG_out;
+  mstats_t st;
+  FILE *filein;
+  unsigned int nr;
+  
+  int i;
+  
+  if(argc < 2){
+    usage(argv);
+    exit(1);
+  }
+  filein = openfile_or_exit(argv[1], "r", 2);
+
+  if (argc > 2){
+    nr = atoi(argv[2]);
+  }
+  else{
+    nr = 0;
+  }
+  
+  init_stats(&st, nr);
+  init_graph(&G_u);
+  init_graph(&G_out);
+  
+  load_graph(filein, &G_u, &G_out);
+
+  fclose(filein);
+  
+  find_subgraphs_3(&G_u, &G_out, st.f_count_real);
+
+  srand(time(NULL));
+  
+  /* Now we create n_r random networks with the same degree
+     distribution, and we perform motifs analysis on each of them */
+
+  init_graph(&RNDG_out);
+  init_graph(&RNDG_u);
+  
+  for(i=0; i<nr; i++){
+    /* Create the random graph */
+    randomise_graph(&G_out, &RNDG_out, &RNDG_u);
+    /* call find_subgraphs_3 in it */
+    find_subgraphs_3(&RNDG_u, &RNDG_out, st.f_count_rnd[i]);
+    //show_progress(stderr, "Randomised networks: ", i+1, nr);
+  }
+  //fprintf(stderr,"\n");
+  
+  /* Now we should print the results on output  */
+  
+  dump_stats(&st);
+
+  free(G_u.J_slap);
+  free(G_u.r_slap);
+  free(G_out.J_slap);
+  free(G_out.r_slap);
+
+  free(RNDG_u.J_slap);
+  free(RNDG_u.r_slap);
+  free(RNDG_out.J_slap);
+  free(RNDG_out.r_slap);
+
+  return 0;
+}
-- 
cgit v1.2.3