%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import pairwise_distances
nodes_labels = pd.read_csv("celegans277labels.csv", header=None)
adjacency = pd.read_csv("celegans277matrix.csv", header=None)
locations = pd.read_csv("celegans277positions.csv", header=None)
adjacency.index = nodes_labels[0]
adjacency.columns = nodes_labels[0]
locations.index = nodes_labels[0]
inter_neuron_dist = pd.DataFrame(pairwise_distances(locations))
inter_neuron_dist.columns = nodes_labels[0]
inter_neuron_dist.index = nodes_labels[0]
inter_neuron_dist.head()
locations.head()
adjacency.head()
def plot_connectome_scale():
plt.figure(figsize=(14,1))
plt.plot(locations.values[:,0],locations.values[:,1],"o",alpha=0.4)
#plt.savefig("connectome_scale.pdf")
def plot_connectome():
sns.set_style("whitegrid")
plt.figure(figsize=(20,12))
for i,row in enumerate(adjacency.values):
for j,columns in enumerate(row):
if columns == 1:
xs = [locations.values[i,0],locations.values[j,0]]
ys = [locations.values[i,1],locations.values[j,1]]
plt.plot(xs,ys,color="red",alpha=0.03)
plt.plot(locations.values[:,0],locations.values[:,1],"o",alpha=0.4)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.xlabel("Posterior - Anterior(mm)",fontsize=30)
plt.ylabel("Dorsal - Ventral(mm)",fontsize=30)
#plt.savefig("connectome.pdf")
#plt.figure(figsize=(14,1))
#plt.plot(locations.values[:,0],locations.values[:,1],"o",alpha=0.4)
#plt.savefig("connectome_scale.pdf")
plot_connectome()
plot_connectome_scale()
plt.imshow(adjacency)
Pas de symétrie, peu de point blancs $\to$ matrice creuse
in_degree = adjacency.sum(axis=0)
out_degree = adjacency.sum(axis=1)
plt.hist(in_degree, alpha=0.4,bins=20) #blue
plt.hist(out_degree, alpha=0.4,bins=20) #orange
plt.xlabel("degree")
plt.ylabel("frequency")
link_list=[]
for row in adjacency.index:
for col in adjacency.columns:
if adjacency.loc[row,col]!=0:
link_list.append([row,col, inter_neuron_dist.loc[row,col]])
len(link_list)
df = pd.DataFrame(link_list, columns=["input", "output", "weight"])
df
import networkx as nx
g = nx.from_pandas_edgelist(df, "input", "output", "weight")
nx.draw(g)
g2 = nx.from_pandas_edgelist(df, "input", "output", "weight", create_using=nx.DiGraph())
nx.draw(g2)
list(g2.nodes())[:10]
list(g2.edges())[:10]
list(g2.neighbors("ADAL"))
g2.get_edge_data("ADAL", "ADFL")
A network motif can be defined as a subgraph (ie. small patterns of interconnections) that repeat itself in a given graph. They are occurring in complex networks at numbers that are significantly higher than those in randomized networks.
feed forward loop, bi-fan, bi-parallel
Feed forward loop
def feed_forward(g):
res = 0
for n in g.nodes():
for i in g.neighbors(n):
for j in g.neighbors(i):
if j in g.neighbors(n):
res += 1
return res
def feed_forward(g):
res = 0
for n in g.nodes():
n1 = list(g.neighbors(n))
for i in n1:
n2 = list(g.neighbors(i))
res += len(set(n2).intersection(set(n1)))
return res
feed_forward(g2)
Ici, on recherche le pattern mais pas de manière exclusive, ie on ne se préoccupe pas du fait que les noeuds du pattern peuvent être connectés à d autre noeuds
Bi-fan loop
from scipy.special import binom
def bi_fan(g):
counts = 0
nodes = list(g.nodes())
for i,n1 in enumerate(nodes):
for n2 in nodes[i+1:]:
neighbors_n1 = set(list(g.neighbors(n1))).difference([n1,n2])
neighbors_n2 = set(list(g.neighbors(n2))).difference([n1,n2])
nb_common_neighbors = len(neighbors_n1.intersection(neighbors_n2))
counts += binom(nb_common_neighbors,2)
return(counts)
def bi_fan(g):
res = 0
i = 0
j = 0
nodes = list(g.nodes())
for i,n1 in enumerate(nodes): #(ie a on figure 6)
neighbors_n1 = list(g.neighbors(n1))
for n2 in nodes[i+1:]: #(ie c on figure 6)
neighbors_n2 = list(g.neighbors(n2))
for j,v1 in enumerate(neighbors_n1): #(ie b on figure 6)
for v2 in neighbors_n1[j+1:]: #(ie d on figure 6)
if ((v1 in neighbors_n2) and (v2 in neighbors_n2)):
res += 1
return res
bi_fan(g2)
def bi_parallel(g):
counts = 0
nodes = list(g.nodes())
for n1 in nodes:
for n2 in nodes:
if n1 != n2:
children_n1 = set(g.successors(n1)).difference([n1,n2])
parents_n2 = set(g.predecessors(n2)).difference([n1,n2])
nb_common_parents_children = len(children_n1.intersection(parents_n2))
counts += binom(nb_common_parents_children,2)
return(counts)
def bi_parallel(g):
res = 0
i = 0
for n in g.nodes(): # (ie a on figure 6)
neighbors_n = list(g.neighbors(n))
for i,v1 in enumerate(neighbors_n): #(ie b on figure 6)
for v2 in neighbors_n[i+1:]: #(ie c on figure 6)
for w in list(g.neighbors(v1)):
if w in list(g.neighbors(v2)):
if w != v1 and w != v2 and w != n:
res += 1
return res
bi_parallel(g2)
For a integer $k$, the rich-club coefficient is equal to the number of edges $|E_k|$ between nodes $V_k = \{v \in V \; | \; deg(v)\geq k\}$, divided by the number of potential edges between nodes $V_k$:
$$\phi(k) = \frac{2 |E_k|}{|V_k|(|V_k| - 1)}$$The networkx
implementation allows to normalize the coefficient by the value $\phi_{rand}(k)$ obtained for randomized versions of the network.
from networkx.algorithms import rich_club_coefficient
rcc = rich_club_coefficient(g2.to_undirected(),normalized=True)
rcc = pd.Series(rcc)
plt.plot(rcc)
plt.xlabel("$k$")
plt.ylabel("$\phi(k)/\phi_{rand}(k)$")
rcc[rcc>1.2]
Here when $k=35$ we have $\phi(k) >1.2$, thus our rich club here contains all the nodes with a degree larger than 35
undirected_degrees = pd.Series(dict(g.degree()))
rich_club = list(undirected_degrees[undirected_degrees>35].index)
perif = list(set(g.nodes).difference(rich_club))
g_rich = g.subgraph(rich_club)
g_perif = g.subgraph(perif)
rich_club
rich_neurons_locations = locations.loc[rich_club]
plot_connectome()
plt.plot(rich_neurons_locations[0],rich_neurons_locations[1],"*k",markersize=7)
The connection distance is simply the 2D Euclidean distance between the connected neurons, it is an approximation of the axonal connection distance (a.k.a. wiring cost).
def compute_pairwise_measure(G,measure):
res = []
nodes = list(G.nodes)
for i, u in enumerate(nodes[:-1]):
for j,v in enumerate(nodes[i+1:]):
try:
res.append(measure(G,u,v))
except:
pass
return(res)
from networkx.algorithms import average_shortest_path_length
average_shortest_path_length(g,"weight"),average_shortest_path_length(g)
average_shortest_path_length(g_rich,"weight"),average_shortest_path_length(g_rich)
from networkx.algorithms import connected_components
for g_p in connected_components(g_perif):
print(len(g_p))
print(average_shortest_path_length(g.subgraph(g_p),"weight"),
average_shortest_path_length(g.subgraph(g_p)))
from networkx.algorithms.shortest_paths import shortest_path_length
def shortest_path_length_(G,u,v):
return(shortest_path_length(G,u,v,weight="weight"))
shortest_path_rich = compute_pairwise_measure(g_rich,shortest_path_length_)
shortest_path_perif = compute_pairwise_measure(g_perif,shortest_path_length_)
shortest_path_g = compute_pairwise_measure(g,shortest_path_length_)
lengths = shortest_path_rich+shortest_path_perif+shortest_path_g
ids = ["rich" for i in shortest_path_rich]+\
["graph" for i in shortest_path_g]+\
["perif" for i in shortest_path_perif]
df = pd.DataFrame({"length":lengths,"network":ids})
sns.boxplot(data=df,x="network",y="length")
The global efficiency $F$ of a network $G = \langle V,E \rangle$ is the the average of the inverse of the shortest paths distances between nodes.
$F(G) = \frac{1}{|V|(|V|-1)} \times \sum_{i\neq j \in V} \frac{1}{d(i,j)}$
The local efficiency $f$ of a graph is simply the average of the global efficiency of the subgraph induced by the neighbors of node $v$, for each node $v\in V$.
from networkx.algorithms import global_efficiency,local_efficiency
loc_eff_rich = local_efficiency(g_rich)
loc_eff_perif = local_efficiency(g_perif)
loc_eff_g = local_efficiency(g)
glob_eff_rich = global_efficiency(g_rich)
glob_eff_perif = global_efficiency(g_perif)
glob_eff_g = global_efficiency(g)
loc_eff_rich,loc_eff_perif,loc_eff_g
glob_eff_rich,glob_eff_perif,glob_eff_g
from networkx.algorithms import efficiency
eff_rich = compute_pairwise_measure(g_rich,efficiency)
eff_perif = compute_pairwise_measure(g_perif,efficiency)
eff_g = compute_pairwise_measure(g,efficiency)
effs = eff_rich+eff_perif+eff_g
ids = ["rich" for i in eff_rich]+\
["graph" for i in eff_g]+\
["perif" for i in eff_perif]
df = pd.DataFrame({"efficiency":effs,"network":ids})
sns.boxplot(data=df,x="network",y="efficiency")
The betweenness centrality measures the importance/centrality of node/edge in a graph, by computing the proportion of shortest paths between any pair of nodes that pass through the given node/edge
from networkx.algorithms.centrality import betweenness_centrality
bc_g = pd.Series(betweenness_centrality(g))
bc_rich = bc_g[rich_club]
bc_perif = bc_g[perif]
bc = list(bc_rich)+list(bc_g)+list(bc_perif)
ids = ["rich" for i in bc_rich]+\
["graph" for i in bc_g]+\
["perif" for i in bc_perif]
df = pd.DataFrame({"betweenness centrality":bc,"network":ids})
sns.boxplot(data=df,x="network",y="betweenness centrality")
plt.ylim([0,0.1])
def partition2series(partition):
nodes = set()
for p in partition:
nodes = nodes.union(p)
nodes = list(nodes)
C = pd.Series(0,index=nodes)
for i,c in enumerate(partition):
if len(c)>1:
C[c] = i+1
C = C.astype(str)
return(C)
def series2partition(C):
gb = C.groupby(C.values)
partition = []
for x in gb.groups:
partition.append(tuple(gb.get_group(x).index))
return(partition)
def plot_connectome_communities(C):
sns.set_style("whitegrid")
plt.figure(figsize=(20,12))
for i,row in enumerate(adjacency.values):
for j,columns in enumerate(row):
if columns == 1:
xs = [locations.values[i,0],locations.values[j,0]]
ys = [locations.values[i,1],locations.values[j,1]]
plt.plot(xs,ys,color="red",alpha=0.01)
sns.scatterplot(locations[0],locations[1],alpha=0.4,hue=C,s=200)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.xlabel("Posterior - Anterior(mm)",fontsize=30)
plt.ylabel("Dorsal - Ventral(mm)",fontsize=30)
from networkx.algorithms.community.centrality import girvan_newman
from tqdm.notebook import tqdm
import itertools
k = 40
comp = girvan_newman(g)
limited = itertools.takewhile(lambda c: len(c) <= k, comp)
results = []
for communities in tqdm(limited):
results.append(tuple(sorted(c) for c in communities))
for r in results:
print([len(rr) for rr in r])
C_gn = partition2series(results[7])+" community"
plot_connectome_communities(C_gn)
plt.plot(rich_neurons_locations[0],rich_neurons_locations[1],"*k",markersize=7)
C_gn[rich_club]
from community import best_partition
C_l = best_partition(g)
C_l = pd.Series(partition).astype(str)+" community"
plot_connectome_communities(C_l)
plt.plot(rich_neurons_locations[0],rich_neurons_locations[1],"*k",markersize=7)
C_l[rich_club]
confusion_table = pd.crosstab(C_l.sort_index(),C_gn.sort_index())
sns.heatmap(confusion_table,annot=True)
from networkx.algorithms.community.quality import modularity,performance
modularity(g,results[7]), modularity(g,series2partition(C_l))
from networkx.algorithms.community.quality import performance
performance(g,results[7]), performance(g,series2partition(C_l))
def intra_inter_community_degree(G,C):
intra_community_degree = {}
inter_community_degree = {}
for n in g.nodes():
neighbors = list(g.neighbors(n))
intra_community_degree[n] = (C[neighbors] == C[n]).sum()
inter_community_degree[n] = len(neighbors) - intra_community_degree[n]
res = pd.DataFrame({"intra_community_deg": intra_community_degree,
"inter_community_deg":inter_community_degree})
return(res)
comm_degrees = intra_inter_community_degree(g,C_l)
rel_comm_degrees = (comm_degrees.T*1./comm_degrees.sum(axis=1)).T
rel_comm_degrees["type"] = "perif"
rel_comm_degrees.loc[rich_club,"type"] = "rich"
sns.boxplot(data=rel_comm_degrees,x="type",y="intra_community_deg")
plt.ylabel("Intra-Community degree")
sns.boxplot(data=rel_comm_degrees,x="type",y="inter_community_deg")
plt.ylabel("Inter-Community degree")