/* Copyright 2020 Morten Movik and Christophe Crespelle */
/* This file is part of Edgecom. */
/* Edgecom is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. */
/* Edgecom is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. */
/* You should have received a copy of the GNU General Public License along with Edgecom. If not, see . */
/**********************************************************************************
* Name : Edgecom
* Authors: Morten Movik and Christophe Crespelle
* Mail : mortenmovik@gmail.com
* Purpose: Computing cmmunities of edges in an undirected graph
* Contributions: Morten Movik participated to the design of the program and wrote the main part of it. Christophe Crespelle participated to the design of the program and wrote part of it.
*********************************************************************************/
#include
#include
#include
#include
#include
#include
#include
#include
#include "prelim.c"
#include "rand.c"
char *IN_NAME = "./data/karate_right_numbers_converted";
//double MIN_MOD_INCREASE = 0.0;
char *OUTPUT_FILENAME = "output";
char *HISTORY_FILENAME = NULL;
bool RANDOM = false;
bool ONLY_NEIGHBOURS = false;
int MEASURE = -1;
void read_command_line_args(int argc, char **argv) {
int i;
for (i=1; i community (table of size n pointing to a table of size 2m, same as "links" for a graph)
int** edge_to_com;
} EdgeCommunities;
void free_EdgeCommunities(EdgeCommunities *com) {
free(com->nb_edge);
free(com->nb_node);
free(com->edge_list[0]);
free(com->edge_list);
free(com->node_list[0]);
free(com->node_list);
free(com->edge_to_com[0]);
free(com->edge_to_com);
}
typedef struct SuperPartition {
//number of supersets
int p;
// minimum free ID for a super set
int freeID;
// number of edges in each super set (table of size k with only p (non-consecutive) indices that are valid)
int* nb_edge;
// number of nodes in each super set (table of size k with only p (non-consecutive) indices that are valid)
int* nb_node;
// mapping community -> super set (table of size k)
int* com_to_sset;
} SuperPartition;
void free_SuperPartition(SuperPartition *spart) {
free(spart->nb_edge);
free(spart->nb_node);
free(spart->com_to_sset);
}
////////////////////////
// END : DATA STRUCTURES
////////////////////////
typedef struct Edge {
int dest;
int origin;
double weight;
} Edge;
Edge *node2edge;
int *rand_perm(int n){
int *perm;
int i, tmp, j;
if( (perm=(int *)malloc(n*sizeof(int))) == NULL )
printf("random_perm: malloc() error");
for (i=n-1;i>=0;i--)
perm[i] = i;
for (i=n-1;i>=0;i--){
j = random()%(i+1);
tmp = perm[i];
perm[i] = perm[j];
perm[j] = tmp;
}
return(perm);
}
void print_communities(const graph *g, EdgeCommunities * com, FILE* fout) {
int i;
int j;
for (i=0; ik; i++) {
fprintf(fout,"edges=%d, nodes=%d\n", com->nb_edge[i], com->nb_node[i]);
for (j=0; jnb_edge[i]; j++) {
fprintf(fout,"(%d,%d) ",com->edge_list[i][j].ori,g->links[com->edge_list[i][j].ori][com->edge_list[i][j].nei_num]);
}
fprintf(fout,"\n");
}
}
//////////////////////
// BEGIN : EXPECTATION
//////////////////////
int find_max_deg(const graph *g) {
int max_deg = 0;
int i;
for (i = 0; i < g->n; i++) {
if (g->degrees[i] > max_deg) {
max_deg = g->degrees[i];
}
}
return max_deg;
}
/** table with how many couples of each degreee-combination there are in g**/
int **get_S(const graph *g, int max_deg) {
int **S = calloc((max_deg + 1), sizeof *S);
int i;
int u;
int v;
for (i = 0; i <= max_deg; i++) {
S[i] = calloc((max_deg + 1), sizeof **S);
}
for (u = 0; u < g->n; u++) {
for (v = 0; v < g->n; v++) {
if (u == v) continue;
S[g->degrees[u]][g->degrees[v]] += 1;
}
}
/* couples between equal degree are counted twice in the table */
for (i = 0; i < max_deg + 1; i++) {
S[i][i] /= 2;
}
return S;
}
/** table with how many edges of each degreee-combination there are in g **/
int **get_T(const graph *g, int max_deg) {
int **T = calloc((max_deg + 1), sizeof *T);
int i;
int j;
int u;
int v;
for (i = 0; i <= max_deg; i++) {
T[i] = calloc((max_deg + 1), sizeof **T);
}
for (u = 0; u < g->n; u++) {
for (j = 0; j < g->degrees[u]; j++) {
v = g->links[u][j];
T[g->degrees[u]][g->degrees[v]] += 1;
}
}
for (i = 0; i < max_deg; i++) {
T[i][i] /= 2;
}
return T;
}
/** Probability that u and v is in V(C) of a community of size l,
* if there is an edge between u and v **/
long double Puv_edge(int l, int ku, int kv, int m) {
// works correctly for:
// calculated for hand Puv_edge(1, 1, 4, 6) = 0.16666666666666666
// calculated for hand Puv_edge(1, 2, 4, 6) = 0.225180
if (ku == 0 || kv == 0) return 0;
if (l == m) return 1;
long double Puv = 0;
Puv += pow(1.0 - (long double) l/(long double)m, ku + kv - 1);
Puv -= pow(1.0 - (long double) l/(long double)m, ku);
Puv -= pow(1.0 - (long double) l/(long double)m, kv);
Puv += 1.0;
return Puv;
}
/** Probability that u and v is in V(C) of a community of size l,
* if there is no edge between u and v **/
long double Puv_noedge(int l, int ku, int kv, int m) {
// works correctly for:
// calculated for hand Puv_noedge(1, 1, 4, 6) = 0.08629115226337448
if (ku == 0 || kv == 0) return 0;
if (l == m) return 1;
long double Puv = 0;
Puv += pow(1.0 - (long double) l/(long double)m, ku + kv);
Puv -= pow(1.0 - (long double) l/(long double)m, ku);
Puv -= pow(1.0 - (long double) l/(long double)m, kv);
Puv += 1.0;
return Puv;
}
long double expectation(int l, const graph *g, bool* calculated, long double* expectation_table) {
if (calculated[l]) return expectation_table[l];
int ku;
int kv;
int i;
int max_deg = find_max_deg(g);
int **S = get_S(g, max_deg);
int **T = get_T(g, max_deg);
long double expectation = 0;
for (ku = 0; ku <= max_deg; ku++) {
for (kv = 0; kv <= max_deg; kv++) {
if (ku > kv) continue;
long double edge = (long double) T[ku][kv];
long double noedge = (long double) S[ku][kv] - edge;
expectation += noedge * Puv_noedge(l, ku, kv, g->m);
expectation += edge * Puv_edge(l, ku, kv, g->m);
}
}
for (i = 0; i < max_deg; i++) {
free(S[i]);
free(T[i]);
}
free(S);
free(T);
expectation_table[l] = expectation;
calculated[l] = true;
return expectation;
}
//////////////////////
// END : EXPECTATION
//////////////////////
// create and initialise a super partition from a given partition into communities by putting each community alone in its super set
SuperPartition* init_superpart (const EdgeCommunities* com) {
int c;
SuperPartition* spart;
if( (spart=(SuperPartition *)malloc(sizeof(SuperPartition))) == NULL )
report_error("init_superpart: malloc() error");
spart->p = com->k;
spart->freeID = spart->p;
if( (spart->nb_edge=(int*)malloc(com->k * sizeof(int))) == NULL )
report_error("init_superpart: malloc() error");
if( (spart->nb_node=(int*)malloc(com->k * sizeof(int))) == NULL )
report_error("init_superpart: malloc() error");
if( (spart->com_to_sset=(int*)malloc(com->k * sizeof(int))) == NULL )
report_error("init_superpart: malloc() error");
for (c = 0; c < com->k; c++) {
spart->nb_edge[c] = com->nb_edge[c];
spart->nb_node[c] = com->nb_node[c];
spart->com_to_sset[c] = c;
}
return spart;
}
/// CHRIS
///////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////// UPDATE_COMMUNITIES ///////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////
/// IN: spart, g
/// IN/OUT: com, visited_nodes (comes back to its initial value at the end of the procedure)
/// OUT:
///////////////////////////////////////////////////////////////////////////////////////////////////
/// PRE-REQUISITE: all cells of visted nodes contain the value -1 and spart is a proper partition of the communities in com, which are communities of graph g
/// RESULT: update com by merging the communities belonging to the same part of the super partition spart
///////////////////////////////////////////////////////////////////////////////////////////////////
void update_communities(EdgeCommunities* com, const SuperPartition* spart, const graph* g, int* visited_nodes) {
int l;
int i,j;
int u,v;
LocalEdge** new_edge_list;
int* cur_edge;
int* nb_com;
int** com_list;
int* cur_com_list;
int** new_node_list;
// build a table new_com of mapping from old communities to new comunity number from 0 to p-1
// update com->nb_edge et com->nb_node (old values are lost)
int* new_com;
int* new_nb_edge;
int* new_nb_node;
int cur_com;
if( (new_com=(int *)malloc(com->k*sizeof(int))) == NULL )
report_error("update_communities: malloc() error");
if( (new_nb_edge=(int *)malloc(spart->p*sizeof(int))) == NULL )
report_error("update_communities: malloc() error");
if( (new_nb_node=(int *)malloc(spart->p*sizeof(int))) == NULL )
report_error("update_communities: malloc() error");
cur_com = 0;
for (i=0; ik; i++) {
if (spart->nb_edge[i]!=-1) {
new_com[i] = cur_com;
new_nb_edge[cur_com]=spart->nb_edge[i];
new_nb_node[cur_com]=spart->nb_node[i];
cur_com++;
}
else
new_com[i] = -1;
}
if (cur_com != (spart->p)) report_error("update_communities: incoherence with p");
// update com->edge_to_com
for (u=0; un; u++) {
for (v=0; vdegrees[u]; v++) {
com->edge_to_com[u][v] = new_com[spart->com_to_sset[com->edge_to_com[u][v]]];
}
}
// update com->edge_list
if( (new_edge_list=(LocalEdge**)malloc(spart->p*sizeof(LocalEdge*))) == NULL )
report_error("update_communities: malloc() error");
if( (new_edge_list[0]=(LocalEdge*)malloc(g->m*sizeof(LocalEdge))) == NULL )
report_error("update_communities: malloc() error");
for (i=1; ip; i++) {
new_edge_list[i] = new_edge_list[i-1]+new_nb_edge[i-1];
}
if( (cur_edge=(int *)malloc(spart->p*sizeof(int))) == NULL )
report_error("update_communities: malloc() error");
for (i=0; ip; i++) cur_edge[i] = 0;
for (i=0; ik; i++) {
for (j=0; jnb_edge[i]; j++) {
new_edge_list[new_com[spart->com_to_sset[i]]][cur_edge[new_com[spart->com_to_sset[i]]]+j]=com->edge_list[i][j];
}
cur_edge[new_com[spart->com_to_sset[i]]] += com->nb_edge[i];
}
for (i=0; ip; i++) {
if (cur_edge[i] != new_nb_edge[i]) report_error("update_communities: incoherence in new_edge_list");
}
free(cur_edge);
// update com->node_list
if( (nb_com=(int*)malloc(spart->p*sizeof(int))) == NULL )
report_error("update_communities: malloc() error");
for (i=0; ip; i++) {
nb_com[i]=0;
}
for (i=0; ik; i++) {
nb_com[new_com[spart->com_to_sset[i]]]++;
}
if( (com_list=(int**)malloc(spart->p*sizeof(int*))) == NULL )
report_error("update_communities: malloc() error");
if( (com_list[0]=(int*)malloc(com->k*sizeof(int))) == NULL )
report_error("update_communities: malloc() error");
for (i=1; ip; i++) {
com_list[i] = com_list[i-1]+nb_com[i-1];
}
if( (cur_com_list=(int*)malloc(spart->p*sizeof(int))) == NULL )
report_error("update_communities: malloc() error");
for (i=0; ip; i++) {
cur_com_list[i]=0;
}
for (i=0; ik; i++) {
com_list[new_com[spart->com_to_sset[i]]][cur_com_list[new_com[spart->com_to_sset[i]]]]=i;
cur_com_list[new_com[spart->com_to_sset[i]]]++;
}
for (i=0; ip; i++) {
if (cur_com_list[i] != nb_com[i]) report_error("update_communities: incoherence in com_list");
}
free(cur_com_list);
if( (new_node_list=(int**)malloc(spart->p*sizeof(int*))) == NULL )
report_error("update_communities: malloc() error");
if( (new_node_list[0]=(int*)malloc(2*g->m*sizeof(int))) == NULL )
report_error("update_communities: malloc() error");
for (i=1; ip; i++) {
new_node_list[i] = new_node_list[i-1]+new_nb_node[i-1];
}
int cur_merge;
for (i=0; ip; i++) {
cur_merge = 0;
for (j=0; jnb_edge[com_list[i][j]]; l++) {
u=com->edge_list[com_list[i][j]][l].ori;
v=g->links[u][com->edge_list[com_list[i][j]][l].nei_num];
if (visited_nodes[u] == -1) {
visited_nodes[u]=1;
new_node_list[i][cur_merge]=u;
cur_merge++;
}
if (visited_nodes[v] == -1) {
visited_nodes[v]=1;
new_node_list[i][cur_merge]=v;
cur_merge++;
}
}
}
if (cur_merge != new_nb_node[i]) report_error("update_communities: incoherence in node_list");
//reset visited_nodes
for (j=0; jnode_list[0]);
free(com->node_list);
com->node_list=new_node_list;
// update com->k
com->k = spart->p;
// update com->nb_edge and com->nb_node
free(com->nb_edge);
free(com->nb_node);
com->nb_edge=new_nb_edge;
com->nb_node=new_nb_node;
//update com->edge_list
free(com->edge_list[0]);
free(com->edge_list);
com->edge_list = new_edge_list;
//free memory
free(new_com);
free(nb_com);
free(com_list[0]);
free(com_list);
}
/// CHRIS
///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////// NODE_DIFF /////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////
/// IN:
/// IN/OUT:
/// OUT:
///////////////////////////////////////////////////////////////////////////////////////////////////
/// PRE-REQUISITE:
/// RESULT: for c a community not in super set sset, return the number of nodes of c that are not in sset
///////////////////////////////////////////////////////////////////////////////////////////////////
int node_diff (const int c, const int sset, const EdgeCommunities* com, const SuperPartition* spart, const graph* g) {
int diff = 0;
int u;
bool in_sset;
int i,j;
for (i = 0; i < com->nb_node[c] ; i++) {
u = com->node_list[c][i];
in_sset = false;
j = 0;
while ((j < g->degrees[u] ) && !in_sset) {
if (spart->com_to_sset[com->edge_to_com[u][j]]==sset) in_sset = true;
j++;
}
if (!in_sset) diff++;
}
return diff;
}
int count_couples(EdgeCommunities *com) {
int couples = 0;
int c;
for (c = 0; c < com->k; c++) {
couples += (com->nb_node[c] * (com->nb_node[c] - 1))/2;
}
return couples;
}
long double calculate_expectation(const graph *g, EdgeCommunities *com, bool *calculated, long double * expectation_table) {
long double expect = 0;
int c;
for (c = 0; c < com->k; c++) {
expect += expectation(com->nb_edge[c], g, calculated, expectation_table);
}
return expect;
}
// returns modularity of edge-partition, using the formula:
// Q = |E|*( 1/couples(partition) - 1/E(couples(partition)) )
long double edge_modularity(const graph *g, EdgeCommunities *partition, bool* calculated, long double* expectation_table) {
int couples = count_couples(partition);
long double q1 = ((long double) g->m) / ((long double) couples);
long double expect = calculate_expectation(g, partition, calculated, expectation_table);
long double q2 = ((long double) g->m) / expect;
return q1 - q2;
}
// returns modularity of edge-partition, using the formula:
// Q = |E|*( couples(partition) - E(couples(partition)) )
long double edge_mod_minus(const graph *g, EdgeCommunities *partition, bool* calculated, long double* expectation_table) {
int couples = count_couples(partition);
long double expect = calculate_expectation(g, partition, calculated, expectation_table);
return (expect - (long double)couples);
}
// returns modularity of edge-partition, using the formula:
// Q = |E|*( E(couples(partition))/couples(partition))
long double edge_mod_ratio(const graph *g, EdgeCommunities *partition, bool* calculated, long double* expectation_table) {
int couples = count_couples(partition);
long double expect = calculate_expectation(g, partition, calculated, expectation_table);
return expect / couples;
}
// gain in modularity for putting community c into sset
long double edge_mod_gain(int c, int sset, int couples_in_spart, long double expect_spart, EdgeCommunities * com, SuperPartition *spart, const graph *g, bool* calculated, long double* expectation_table) {
if (expect_spart <= 0.0)
report_error("The expected number of couples in the partition must be positive.\n");
if (couples_in_spart <= 0)
report_error("the number of couples in the superpartition must be positive.\n");
// COUPLES
int diff_nodes = node_diff(c, sset, com, spart, g);
int nodes_sset = spart->nb_node[sset];
int nodes_c = com->nb_node[c];
int couples_after = couples_in_spart;
couples_after -= (spart->nb_node[sset] * (spart->nb_node[sset] - 1)) / 2;
couples_after -= (nodes_c * (nodes_c - 1)) / 2;
couples_after += ((nodes_sset + diff_nodes) * (nodes_sset + diff_nodes - 1)) / 2;
// EXPECTED COUPLES
long double expectation_after = expect_spart;
expectation_after -= expectation(com->nb_edge[c], g, calculated, expectation_table);
expectation_after -= expectation(spart->nb_edge[sset], g, calculated, expectation_table);
expectation_after += expectation(com->nb_edge[c] + spart->nb_edge[sset], g, calculated, expectation_table);
// DELTA MODULARITY
// modularity is mod after merge, minus mod before
long double mod;
if (MEASURE == 1) {
// with first idea of modularity
mod = (long double) g->m / ((long double) (couples_after)) - (long double) g->m / ((long double) (expectation_after));
mod -= (long double) g->m / ((long double) couples_in_spart) - (long double) g->m / ((long double) expect_spart);
} else if (MEASURE == 2) {
// with second idea of modularity
mod = ((long double) (couples_after)) - ((long double) (expectation_after));
mod -= ((long double) couples_in_spart) - ((long double) expect_spart);
mod = -mod;
} else if (MEASURE == 3) {
// with third idea of modularity
mod = ((long double) (expectation_after)) / ((long double) (couples_after)) ;
mod -= ((long double) expect_spart) / ((long double) couples_in_spart);
} else report_error("measure must be given with -m option\n");
return mod;
}
// Returns sorted adj-list
int** sort_adj_list(graph *g) {
int i, u, j;
//allocate memory for new adjacency list
int **adj = (int**) calloc(g->n,sizeof(int*));
adj[0] = (int*) calloc(2*g->m, sizeof(int));
for (i = 1; i < g->n; i++) {
adj[i] = adj[i-1] + g->degrees[i-1];
}
int *indices = (int*) calloc(g->n, sizeof(int));
for (u = 0; u < g->n; u++) {
for (j = 0; j < g->degrees[u]; j++) {
int v = g->links[u][j];
adj[v][indices[v]++] = u;
}
}
free(indices);
return adj;
}
void init_edge_communities(const graph *g, EdgeCommunities* partition) {
int i;
partition->k = g->m;
//Allocate nb_edge, nb_node
if( (partition->nb_edge=(int *)malloc(partition->k*sizeof(int))) == NULL )
report_error("init_edge_communities: malloc() error");
for (i = 0; i < partition->k; i++) {
partition->nb_edge[i] = 1;
}
if( (partition->nb_node=(int *)malloc(partition->k*sizeof(int))) == NULL )
report_error("init_edge_communities: malloc() error");
for (i = 0; i < partition->k; i++) {
partition->nb_node[i] = 2;
}
//Allocate edge_list
if( (partition->edge_list=(LocalEdge **)malloc(partition->k*sizeof(LocalEdge*))) == NULL )
report_error("init_edge_communities: malloc() error");
if( (partition->edge_list[0]=(LocalEdge *)malloc(g->m * sizeof(LocalEdge))) == NULL )
report_error("init_edge_communities: malloc() error");
for (i = 1; i < partition->k; i++) {
partition->edge_list[i] = partition->edge_list[i-1] + 1;
}
//Allocate node_list
if( (partition->node_list=(int **)malloc(partition->k*sizeof(int*))) == NULL )
report_error("init_edge_communities: malloc() error");
if( (partition->node_list[0]=(int *)malloc(2*g->m * sizeof(int))) == NULL )
report_error("init_edge_communities: malloc() error");
for (i = 1; i < partition->k; i++) {
partition->node_list[i] = partition->node_list[i-1] + 2;
}
//Allocate edge_to_com
if( (partition->edge_to_com=(int **)malloc(g->n*sizeof(int*))) == NULL )
report_error("init_edge_communities: malloc() error");
if( (partition->edge_to_com[0]=(int *)malloc(2*g->m * sizeof(int))) == NULL )
report_error("init_edge_communities: malloc() error");
for (i = 1; i < g->n; i++) {
partition->edge_to_com[i] = partition->edge_to_com[i-1] + g->degrees[i-1];
}
// Fill edge_list, node_list, and edge_to_com
//int **sorted_adj = sort_adj_list(g);
int* indices;
if( (indices=(int *)malloc(g->n*sizeof(int))) == NULL )
report_error("init_edge_communities: malloc() error");
for (i = 0; i < g->n; i++) indices[i]=0;
int u,j,l;
int com = 0;
for (u = 0; u < g->n; u++) {
for (j = 0; j < g->degrees[u]; j++) {
int v = g->links[u][j];
if (u < v) {
// fill edge_list
partition->edge_list[com][0].ori = u;
partition->edge_list[com][0].nei_num = j;
// fill node_list
partition->node_list[com][0] = u;
partition->node_list[com][1] = v;
// fill edge_to_com
partition->edge_to_com[u][j] = com;
com++;
}
else {
l=0;
while (g->links[v][l]!=u) l++;
partition->edge_to_com[u][j] = partition->edge_to_com[v][l];
}
}
}
free(indices);
}
// Remove c from it's superset and put it in a new superset freeID. Do not update freeID
void remove_com(int c, EdgeCommunities *com, SuperPartition *spart, const graph *g) {
int sset = spart->com_to_sset[c];
if (com->nb_edge[c] != spart->nb_edge[sset]) {
spart->p++;
spart->com_to_sset[c] = spart->freeID;
spart->nb_edge[sset] -= com->nb_edge[c];
spart->nb_node[sset] -= node_diff(c, sset, com, spart, g);
spart->nb_edge[spart->com_to_sset[c]] = com->nb_edge[c];
spart->nb_node[spart->com_to_sset[c]] = com->nb_node[c];
if (spart->nb_node[sset] <= 0 || spart->nb_edge[sset] <= 0)
report_error("a sset ended up with a non-positive number of nodes or edges after moving a community out of it");
}
}
// move c from its current super set (local variable ori_sset) to dest_sset
void move(int c, int dest_sset, EdgeCommunities *com, SuperPartition *spart, const graph *g) {
int diff_nodes_ori;
int diff_nodes_dest = node_diff(c, dest_sset, com, spart, g);
int ori_sset = spart->com_to_sset[c];
spart->nb_edge[dest_sset] += com->nb_edge[c];
spart->nb_node[dest_sset] += diff_nodes_dest;
spart->com_to_sset[c] = dest_sset;
if (spart->nb_edge[ori_sset] == com->nb_edge[c]) {
spart->nb_edge[ori_sset] = -1;
spart->nb_node[ori_sset] = -1;
spart->p -= 1;
if (ori_sset < spart->freeID) spart->freeID = ori_sset;
}
else {
diff_nodes_ori = node_diff(c, ori_sset, com, spart, g);
spart->nb_edge[ori_sset] -= com->nb_edge[c];
spart->nb_node[ori_sset] -= diff_nodes_ori;
}
}
// take a partition into edge communities and group some communities together to obtain a superpartition
// return true if there is an improvement
bool one_level_edge(const graph *g, EdgeCommunities* com, SuperPartition *spart, bool* calculated, long double* expectation_table) {
int c;
/////////// initialize couples and expectation /////////////////
// remember to update this for every insert/remove:
int couples_in_spart = 0;
long double expect_spart = 0.0;
int diff_nodes;
int nodes_sset;
// all communities in different super sets
for (c = 0; c < com->k; c++) {
couples_in_spart += (com->nb_node[c] * (com->nb_node[c] - 1)) / 2;
expect_spart += expectation(com->nb_edge[c], g, calculated, expectation_table);
}
bool improved_this_turn;
bool overall_improvement = false;
long double best_mod_gain;
long double gain_comeback;
int inter_sset;
int *random_order;
int round = 0;
int h, c2;
do {
round++;
if (RANDOM) {
random_order = rand_perm(com->k);
} else {
random_order = (int*) malloc(com->k*sizeof(int));
for (int i = 0; i < com->k; i++) {
random_order[i] = i;
}
}
improved_this_turn = false;
////////////// Treat community c //////////////////
for (h = 0; h < com->k; h++) {
int c = random_order[h];
int old_sset = spart->com_to_sset[c];
/////////// Put c in a sset by itself /////////////////
remove_com(c, com, spart, g);
inter_sset = spart->com_to_sset[c];
// update if we moved c:
if (spart->com_to_sset[c] != old_sset) {
// update expectation:
expect_spart -= expectation(com->nb_edge[c] + spart->nb_edge[old_sset], g, calculated, expectation_table);
expect_spart += expectation(spart->nb_edge[old_sset], g, calculated, expectation_table);
expect_spart += expectation(com->nb_edge[c], g, calculated, expectation_table);
// update couples:
diff_nodes = node_diff(c, old_sset, com, spart, g);
nodes_sset = spart->nb_node[old_sset];
couples_in_spart -= ((nodes_sset + diff_nodes) * (nodes_sset + diff_nodes - 1)) / 2;
couples_in_spart += (nodes_sset * (nodes_sset - 1)) / 2;
couples_in_spart += (com->nb_node[c] * (com->nb_node[c] - 1)) / 2;
}
/////////////// which sset do we insert c into? //////////////////
int best_sset = -1;
best_mod_gain = -LDBL_MAX;
if (spart->com_to_sset[c] == old_sset) gain_comeback = 0.0;
for (c2 = 0; c2 < com->k; c2++) {
int new_sset;
if (c2 != c) {
new_sset = spart->com_to_sset[c2];
if (ONLY_NEIGHBOURS && node_diff(c, new_sset, com, spart, g) == com->nb_node[c]){
continue; //don't move if no links are shared between V(c) and V(new_sset)
}
long double gain = 0.0;
gain = edge_mod_gain(c, new_sset, couples_in_spart, expect_spart, com, spart, g, calculated, expectation_table);
if (new_sset == old_sset) gain_comeback = gain;
if (gain > best_mod_gain) {
best_mod_gain = gain;
best_sset = new_sset;
}
}
}
if (best_mod_gain < 0.0) {
best_mod_gain = 0.0;
best_sset = spart->com_to_sset[c];
}
if (best_mod_gain > gain_comeback) {
improved_this_turn = true;
overall_improvement = true;
}
else {
best_sset = old_sset;
}
//////////////////// insert c into best community or keep intermediate ////////////////
if (best_sset == inter_sset) {
if (inter_sset == spart->freeID) {
while (spart->nb_node[spart->freeID] != -1) spart->freeID++;
}
else {
}
} else {
// insert into new community
// update expectation
expect_spart -= expectation(com->nb_edge[c], g, calculated, expectation_table);
expect_spart -= expectation(spart->nb_edge[best_sset], g, calculated, expectation_table);
expect_spart += expectation(spart->nb_edge[best_sset] + com->nb_edge[c], g, calculated, expectation_table);
// update couples
diff_nodes = node_diff(c, best_sset, com, spart, g);
nodes_sset = spart->nb_node[best_sset];
couples_in_spart -= (spart->nb_node[best_sset] * (spart->nb_node[best_sset] - 1)) / 2;
couples_in_spart -= (com->nb_node[c] * (com->nb_node[c] - 1)) / 2;
couples_in_spart += ((nodes_sset + diff_nodes) * (nodes_sset + diff_nodes - 1)) / 2;
// insert c into the best sset:
move(c, best_sset, com, spart, g);
// sset with freeID is now free again
if ((spart->nb_edge[spart->freeID] != 0 && spart->nb_edge[spart->freeID] != -1)
|| (spart->nb_node[spart->freeID] != 0 && spart->nb_node[spart->freeID] != 0))
spart->nb_node[spart->freeID] = -1;
spart->nb_edge[spart->freeID] = -1;
}
}
free(random_order);
} while (improved_this_turn);
return overall_improvement;
}
// write output of algorithm to file out.
void output(FILE *out, EdgeCommunities *com, const graph *g, bool *calculated, long double *expectation_table) {
int couples = count_couples(com);
long double expected = calculate_expectation(g, com, calculated, expectation_table);
if (MEASURE == 1) fprintf(out, "Modularity used: m/r - m/E(r)\n");
else if (MEASURE == 2) fprintf(out, "Modularity used: (E(r) - r)\n");
else if (MEASURE == 3) fprintf(out, "Modularity used: (E(r)/r)\n");
else report_error("measure must be given with -m option");
fprintf(out, "Modularity m/r - m/E(r): %Lf\n", edge_modularity(g, com, calculated, expectation_table));
fprintf(out, "Modularity (E(r) - r): %Lf\n", edge_mod_minus(g, com, calculated, expectation_table));
fprintf(out, "Modularity (E(r)/r) = %Lf\n", edge_mod_ratio(g, com, calculated, expectation_table));
fprintf(out, "couples: %d\n", couples);
fprintf(out, "expectation: %Lf\n", expected);
fprintf(out, "# of communities: %d\n", com->k);
print_communities(g, com, out);
}
// LOUVAIN FOR EDGES, MAIN FUNCTION
EdgeCommunities* edge_louvain(const graph *g, bool* calculated, long double * expectation_table) {
clock_t start_at = clock();
int i;
bool improved = true;
int* visited_nodes;
EdgeCommunities* com;
SuperPartition* spart;
if( (visited_nodes=(int*)malloc(g->n*sizeof(int))) == NULL )
report_error("main: malloc() error");
for (i=0; in; i++) visited_nodes[i]=-1;
if( (com=(EdgeCommunities *)malloc(sizeof(EdgeCommunities))) == NULL )
report_error("main: malloc() error");
// initialize partition
init_edge_communities(g, com);
spart = init_superpart(com);
// prepare output files
FILE *out = NULL;
if ( (out=fopen(OUTPUT_FILENAME,"w"))==NULL)
perror("fopen");
FILE *history = NULL;
if (HISTORY_FILENAME) {
if ( (history=fopen(HISTORY_FILENAME,"w"))==NULL)
perror("fopen");
}
i=0;
while (improved) {
i++;
improved = one_level_edge(g, com, spart, calculated, expectation_table);
update_communities(com, spart, g, visited_nodes);
free_SuperPartition(spart);
spart = init_superpart(com);
if (HISTORY_FILENAME) {
fprintf(history, "after STAGE %d\n", i);
output(history, com, g, calculated, expectation_table);
}
}
double elapsed = ((double) (clock() - start_at)) / CLOCKS_PER_SEC;
int elapsed_hour = elapsed / (60*60);
int rest_min = elapsed/60 - elapsed_hour*60;
int rest_sec = elapsed - elapsed_hour*60*60 - rest_min*60;
fprintf(out, "elapsed time: %d hour, %d min, %d sec, after final stage (%d):\n", elapsed_hour, rest_min, rest_sec, i);
output(out, com, g, calculated, expectation_table);
fclose(out);
if (HISTORY_FILENAME) fclose(history);
free(visited_nodes);
free_SuperPartition(spart);
return com;
}
///////////////////////////
//// MAIN ////
///////////////////////////
int main(int argc, char **argv) {
int i;
// command line arguments
read_command_line_args(argc, argv);
if (RANDOM) {
srand(time(NULL));
} else srand((unsigned) 102458);
FILE* infile=NULL;
graph* g=NULL;
// Create graph
if ( (infile=fopen(IN_NAME,"r"))==NULL)
report_error("IN_NAME -- fopen: error");
g = graph_from_file(infile);
fclose(infile);
long double *expectation_table;
bool *calculated;
if( (expectation_table=(long double *)malloc((g->m+1)*sizeof(long double))) == NULL )
report_error("main: malloc() error");
for (i=0; im+1; i++) expectation_table[i] = -1.0;
if( (calculated=(bool *)malloc(g->m+1*sizeof(bool))) == NULL )
report_error("main: malloc() error");
for (i=0; im; i++) calculated[i] = false;
EdgeCommunities* com;
com=edge_louvain(g, calculated, expectation_table);
free_EdgeCommunities(com);
return 0;
}