本文依赖Networkx库,我的Network库版本是2.0,如需安装请输入:
pip install networkx==2.0
会自动卸载已有版本,安装2.0版本。
LPA算法2.7版本代码如下:
import collections
import random
import networkx as nx
'''
paper : <<Near linear time algorithm to detect community structures in large-scale networks>>
'''
class LPA():
def __init__(self, G, max_iter = 20):
self._G = G
self._n = len(G.node) #number of nodes
self._max_iter = max_iter
def can_stop(self):
# all node has the label same with its most neighbor
for i in range(self._n):
node = self._G.node[i]
label = node["label"]
max_labels = self.get_max_neighbor_label(i)
if(label not in max_labels):
return False
return True
def get_max_neighbor_label(self,node_index):
m = collections.defaultdict(int)
for neighbor_index in self._G.neighbors(node_index):
neighbor_label = self._G.node[neighbor_index]["label"]
m[neighbor_label] += 1
max_v = max(m.itervalues())
return [item[0] for item in m.items() if item[1] == max_v]
'''asynchronous update'''
def populate_label(self):
#random visit
visitSequence = random.sample(self._G.nodes(),len(self._G.nodes()))
for i in visitSequence:
node = self._G.node[i]
label = node["label"]
max_labels = self.get_max_neighbor_label(i)
if(label not in max_labels):
newLabel = random.choice(max_labels)
node["label"] = newLabel
def get_communities(self):
communities = collections.defaultdict(lambda:list())
for node in self._G.nodes(True):
label = node[1]["label"]
communities[label].append(node[0])
return communities.values()
def execute(self):
#initial label
for i in range(self._n):
self._G.node[i]["label"] = i
iter_time = 0
#populate label
while(not self.can_stop() and iter_time<self._max_iter):
self.populate_label()
iter_time += 1
return self.get_communities()
if __name__ == '__main__':
G = nx.karate_club_graph()
algorithm = LPA(G)
communities = algorithm.execute()
for community in communities:
print community
LPA算法Python3版本如下:
# -*- coding: utf-8 -*-
'''
Created on 2017-1-4
@author: Xinyu Huang
'''
import collections
from datetime import datetime
import random
import networkx as nx
class LPA():
def __init__(self, G, max_iter = 1000):
self._G = G
self._n = len(G.nodes) #number of nodes
print(len(G.nodes), G.nodes(data=True))
self._max_iter = max_iter
def can_stop(self):
# all node has the label same with its most neighbor
# for i in range(self._n):
# node = self._G.nodes(data=True)[i]
# label = node[1]["label"]
# # 这里改为node
for (n,d) in self._G.nodes(data=True):
label = d['label']
max_labels = self.get_max_neighbor_label(n)
if(label not in max_labels):
return False
return True
def get_max_neighbor_label(self,node):
m = collections.defaultdict(int)
#print 'node',node
#print self._G.node[node]
for neighbor_index in self._G.neighbors(node):
neighbor_label = self._G.node[neighbor_index]["label"]
m[neighbor_label] += 1
print('m.items()',m.values())
max_v = max(m.values())
#print [item[0] for item in m.items() if item[1] == max_v]
return [item[0] for item in m.items() if item[1] == max_v]
'''asynchronous update'''
def populate_label(self):
#random visit
visitSequence = random.sample(self._G.nodes(),len(self._G.nodes()))
#print('visitSequence',visitSequence)
for i in visitSequence:
node = self._G.node[i]
label = node["label"]
max_labels = self.get_max_neighbor_label(i)
print(max_labels)
if(label not in max_labels):
newLabel = random.choice(max_labels)
node["label"] = newLabel
def get_communities(self):
communities = collections.defaultdict(lambda:list())
for node in self._G.nodes(True):
label = node[1]["label"]
communities[label].append(node[0])
return communities.values()
def execute(self):
#initial label
print(self._G.nodes)
for i in self._G.nodes():
print(self._G.nodes[i])
self._G.nodes[i]["label"] = i
iter_time = 0
#populate label
while(not self.can_stop() and iter_time<self._max_iter):
self.populate_label()
iter_time += 1
return self.get_communities()
def read_txt2G(filepath, line):
G=nx.Graph()
datafile = open(filepath)
lines = datafile.readlines(line)
for line in lines:
nodes=line.split(' ')
u = int(nodes[0])
v = int(nodes[1].strip('\n'))
#print(u,v)
G.add_edge(u,v)
return G
def read_txt2B(filepath, line):
B=nx.Graph()
datafile = open(filepath)
lines = datafile.readlines(line)
for line in lines:
nodes=line.split(' ')
u = nodes[0]
v = nodes[1].strip('\n')
#print(u,v)
node_a = 'A'+u
node_b = 'B'+v
B.add_node(node_a,type ='A')
B.add_node(node_b,type ='B')
B.add_edge(node_a,node_b)
#print B.nodes(data=True)
return B
def run_LPA(G):
starttime= datetime.now()
communities = LPA(G).execute()
endtime = datetime.now()
return communities
# print u'LAP算法执行 %s时间:' %name, (endtime - starttime).microseconds
# print Q1(communities,G)
#print communities
# if __name__ == '__main__':
# starttime= datetime.now()
# G = nx.karate_club_graph()
# #B=read_txt2B('../data/u1.txt', 5000)
# # print G.nodes(data=True)
# #S=B2S(B, 'B')
# #@print S.nodes(data=True)
# # show_graph(S)
# communities = LPA(G).execute()
# print Q1(communities,G)
# for community in communities:
# print community
# endtime = datetime.now()
# print u'算法执行时间:', (endtime - starttime).microseconds
不同版本之间可能存在的兼容性问题是:
1. dict.has_key()问题:在Python2中,判断字典中是否含有某个key,可以使用has_key()方法。但是,在Python3中,不支持此方法,在Python3中,支持in用法,也就是说可以直接使用 ‘key’ in dict 即可。
2. Python3下错误AttributeError: ‘dict’ object has no attribute’iteritems‘的分析与解决:
for k, v in dict1.iteritems():
print(k+ "=>" + v)
在python3中,iteritems() 直接使用 values()