diff options
Diffstat (limited to 'src/label_prop/label_prop.c')
-rw-r--r-- | src/label_prop/label_prop.c | 415 |
1 files changed, 415 insertions, 0 deletions
diff --git a/src/label_prop/label_prop.c b/src/label_prop/label_prop.c new file mode 100644 index 0000000..3488834 --- /dev/null +++ b/src/label_prop/label_prop.c @@ -0,0 +1,415 @@ +/** + * This program is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see + * <http://www.gnu.org/licenses/>. + * + * (c) Vincenzo Nicosia 2009-2017 -- <v.nicosia@qmul.ac.uk> + * + * This file is part of NetBunch, a package for complex network + * analysis and modelling. For more information please visit: + * + * http://www.complex-networks.net/ + * + * If you use this software, please add a reference to + * + * V. Latora, V. Nicosia, G. Russo + * "Complex Networks: Principles, Methods and Applications" + * Cambridge University Press (2017) + * ISBN: 9781107103184 + * + *********************************************************************** + * + * This program finds the communities in a graph using the + * label-propagation algorithm proposed by Raghavan, Albert, and + * Kumara. + * + * References: + * + * [1] U. N. Raghavan, R. Albert, and S. Kumara. "Near linear time + * algorithm to detect community structures in large-scale + * networks". Phys. Rev. E 76 (2007), 036106. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <math.h> + + +#include "iltree.h" +#include "utils.h" + + +typedef struct{ + int label; + int freq; +} label_freq_t; + +#define MODE_SYNC 0x0 +#define MODE_ASYNC 0x1 + + + +/* Usage */ +void usage(char *argv[]){ + printf("********************************************************************\n" + "** **\n" + "** -*- label_prop -*- **\n" + "** **\n" + "** Find the communities in 'graph_in' using the label **\n" + "** propagation algorithm. **\n" + "** **\n" + "** The first parameter is used to choose between synchronous **\n" + "** (SYNC) and asynchronous (ASYNC) update. **\n" + "** **\n" + "** The input file 'graph_in' is an edge-list. **\n" + "** If 'graph_in' is equal to '-' (dash), read the file from **\n" + "** the standard input (STDIN). **\n" + "** **\n" + "** If 'max_epochs' is specified, the program stops after **\n" + "** 'max_epochs' epochs (useful in conjunction with SYNC, to **\n" + "** exit from loops). **\n" + "** **\n" + "** The program prints on STDOUT the partition obtained when **\n" + "** no more label flips are possible, in the format: **\n" + "** **\n" + "** node_1 comm_1 **\n" + "** node_2 comm_2 **\n" + "** node_3 comm_3 **\n" + "** ..... **\n" + "** **\n" + "** where 'comm_1' is the community to which 'node_1' belongs. **\n" + "** **\n" + "** The program prints on STDERR one line for each epoch, **\n" + "** in the format: **\n" + "** **\n" + "** epoch_1 Q_1 flips_1 **\n" + "** epoch_2 Q_2 flips_2 **\n" + "** ..... **\n" + "** **\n" + "** where 'epoch_i' is the epoch number, 'Q_i' is the modularity **\n" + "** of the partition found at that epoch, and 'flips_i' is the **\n" + "** number of label flips occurred in 'epoch_i'. **\n" + "** **\n" + "********************************************************************\n" + " This is Free Software - You can use and distribute it under \n" + " the terms of the GNU General Public License, version 3 or later\n\n" + " Please visit http://www.complex-networks.net for more information\n\n" + " (c) Vincenzo Nicosia 2009-2017 (v.nicosia@qmul.ac.uk)\n" + "********************************************************************\n\n" + ); + printf("Usage: %s [SYNC|ASYNC] <graph_in> [<max_epochs>]\n\n" , argv[0]); +} + + + + +/* Compare the frequency of two labels and return a value which allows + to sort them in reverse order (i.e., -v, if v=f1-f2) */ + +int compare_label_freq_reverse(const void *e1, const void *e2){ + + label_freq_t v1, v2; + + v1 = *((label_freq_t*)e1); + v2 = *((label_freq_t*)e2); + + return - (v1.freq - v2.freq); + +} + +/* get the most common label in neighs (that is the list of the k + neighbours of a node) */ +unsigned int get_most_common_label(unsigned int *neighs, unsigned int k, + unsigned int *labels, unsigned int ref_label, + int *is_max){ + + static label_freq_t *neigh_labels = NULL; + static int size = 0; + int num, i, j, max_freq; + + if (size < k){ + size = k; + neigh_labels = realloc(neigh_labels, size * sizeof(label_freq_t)); + } + + neigh_labels[0].label = labels[neighs[0]]; + neigh_labels[0].freq = 1; + num = 1; + + for (i=1; i<k; i ++){ + for(j=0; j<num; j++){ + if (labels[neighs[i]] == neigh_labels[j].label) + break; + } + if (j == num){ /* new label */ + neigh_labels[j].label = labels[neighs[i]]; + neigh_labels[j].freq = 1; + num += 1; + } + else{/* the label already exists -> increase the counter */ + neigh_labels[j].freq += 1; + } + } + + /* Now we sort the array neigh_labels */ + qsort(neigh_labels, num, sizeof(label_freq_t), compare_label_freq_reverse); + + /* we determine how many neighbours have the maximum freq*/ + max_freq = neigh_labels[0].freq; + i = 1; + while(i < num && neigh_labels[i].freq == max_freq){ + i ++; + } + + /* check whether ref_label is one of the most common labels */ + *is_max = 0; + for (j=0; j<i; j++){ + if (neigh_labels[j].label == ref_label) + *is_max = 1; + } + /* now that we know that there are "i" max_freqs, let's select one + of them at random */ + j = rand() % i; + + return neigh_labels[j].label; +} + +/* reassign the labels so that communities are numbered from 1 to NC */ +int normalise_labels(unsigned int *labels, unsigned int N, + unsigned int *label_count){ + + unsigned int *label_map; + int i, j, num = 0; + + + label_map = malloc(N * sizeof(unsigned int)); + + label_map[0] = labels[0]; + labels[0] = 0; + label_count[0] = 1; + num = 1; + + for(i=1; i<N; i ++){ + for(j=0; j<num; j++){ + if (labels[i] == label_map[j]) + break; + } + if (j == num){ + label_map[j] = labels[i]; + label_count[j] = 0; + num +=1; + } + labels[i] = j; + label_count[j] += 1; + } + + free(label_map); + return num; +} + + +void dump_partition(unsigned int *labels, unsigned int *label_count, unsigned int N){ + + int i; + + for(i=0; i<N; i ++){ + fprintf(stdout, "%d %d\n", i, labels[i]);//, label_count[labels[i]]); + } +} + + +/* compute the modularity of the current partition */ + +double modularity(unsigned int *J_slap, unsigned int *r_slap, unsigned int N, + unsigned int K, unsigned int *comm, unsigned int NC){ + + double Q=0; + int i, j; + unsigned int c_i, c_j; + double *pmm, *am; + + pmm = malloc(NC * sizeof(double)); + am = malloc(NC * sizeof(double)); + + for (i=0; i<NC; i++){ + pmm[i] = am[i] = 0; + } + + for(i=0; i<N; i ++){ + c_i = comm[i]; + am[c_i] += degree(r_slap, i); + for(j=r_slap[i]; j<r_slap[i+1]; j++){ + c_j = comm[J_slap[j]]; + if ( c_j == c_i ){ + pmm[c_i] += 0.5; + } + } + } + Q = 0.0; + for(i=0; i < NC; i++){ + Q += (pmm[i]* 2.0 / K - pow((am[i] * 1.0 / K), 2)); + } + free(am); + free(pmm); + return Q; +} + + + +unsigned int* label_propagation(unsigned int *J_slap, unsigned int *r_slap, unsigned int N, + unsigned int K, unsigned int *num_epochs, + int max_epochs, char mode){ + + unsigned int *labels, *next_labels, *tmp_labels, *ids, tmp, new_label; + int i, epochs, j, k, cont, is_max; + long long int num_flips; + double Q; + + + + labels = malloc(N * sizeof(unsigned int)); + ids = malloc(N * sizeof(unsigned int)); + + if(mode == MODE_ASYNC){ + next_labels = labels; + } + else if (mode == MODE_SYNC){ + next_labels = malloc(N * sizeof(unsigned int)); + } + + /* We initialize the list of ids and labels */ + for (i=0; i<N; i ++){ + ids[i] = labels[i] = i; + } + + cont = 1; + epochs = 0; + while(cont){ + if (max_epochs > 0 && epochs > max_epochs) + break; + cont = 0; + if (epochs > 0){ + Q= modularity(J_slap, r_slap, N, K, labels, N); + fprintf(stderr, "%d %g %g\n", epochs, Q, (double)num_flips); + } + num_flips = 0; + + epochs += 1; + for (i=N-1; i>=0; i--){ + j = rand() % (i+1); + tmp = ids[j]; /* This is the id to be considered */ + + ids[j] = ids[i]; + ids[i] = tmp; + k = r_slap[tmp + 1] - r_slap[tmp]; + new_label = get_most_common_label(J_slap+r_slap[tmp], k,labels, labels[tmp], &is_max); + + /* Stop criterion: if the new label is not equal to the + old one, continue to another epoch */ + + if (mode == MODE_ASYNC && labels[tmp] != new_label){ + labels[tmp] = new_label; + cont = 1; + num_flips += 1 ; + } + if (mode == MODE_SYNC){ + next_labels[tmp] = new_label; + if (labels[tmp] != next_labels[tmp]){ + cont = 1; + num_flips += 1; + } + } + } + if (mode == MODE_SYNC){ + /* Now we can swap labels and next_labels */ + tmp_labels = labels; + labels = next_labels; + next_labels = tmp_labels; + } + } + free(ids); + *num_epochs = epochs - 1; + + if (mode == MODE_SYNC){ + free(next_labels); + } + + return labels; +} + + + + + + +int main(int argc, char *argv[]){ + + unsigned int N, K, nc; + unsigned int *J_slap, *r_slap, *labels, *label_count, num_epochs, max_epochs; + FILE *filein; + double Q; + char mode; + + + if (argc < 3){ + usage(argv); + exit(1); + } + + srand(time(NULL)); + + if (!strcmp(argv[1], "-")){ + /* take the input from STDIN */ + filein = stdin; + } + else { + filein = openfile_or_exit(argv[2], "r", 2); + } + + + read_slap(filein, &K, &N, &J_slap, &r_slap); + + fclose(filein); + + if (!my_strcasecmp(argv[1], "sync")){ + mode = MODE_SYNC; + } + else{ + mode = MODE_ASYNC; + } + + if (argc > 3) + max_epochs = atoi(argv[3]); + else + max_epochs = 0; + + labels = label_propagation(J_slap, r_slap, N, K, &num_epochs, max_epochs, mode); + label_count = malloc(N * sizeof(unsigned int)); + + nc = normalise_labels(labels, N, label_count); + + Q= modularity(J_slap, r_slap, N, K, labels, nc); + + printf("### nc: %d Q_max: %f Epochs: %d\n", nc, Q, num_epochs); + dump_partition(labels, label_count, N); + free(J_slap); + free(r_slap); + free(label_count); + free(labels); +} + + + |