LPA算法在Python2与Python3中的差异

hxy    2018-11-21 08:51

本文依赖Networkx库,我的Network库版本是2.0,如需安装请输入:

pip install networkx==2.0

会自动卸载已有版本,安装2.0版本。

LPA算法2.7版本代码如下:

import collections
import random
import networkx as nx

'''
paper : <<Near linear time algorithm to detect community structures in large-scale networks>>
'''

class LPA():
    
    def __init__(self, G, max_iter = 20):
        self._G = G
        self._n = len(G.node) #number of nodes
        self._max_iter = max_iter
        
    def can_stop(self):
        # all node has the label same with its most neighbor
        for i in range(self._n):
            node = self._G.node[i]
            label = node["label"]
            max_labels = self.get_max_neighbor_label(i)
            if(label not in max_labels):
                return False
        return True
        
    def get_max_neighbor_label(self,node_index):
        m = collections.defaultdict(int)
        for neighbor_index in self._G.neighbors(node_index):
            neighbor_label = self._G.node[neighbor_index]["label"]
            m[neighbor_label] += 1
        max_v = max(m.itervalues())
        return [item[0] for item in m.items() if item[1] == max_v]

    
    '''asynchronous update'''
    def populate_label(self):
        #random visit
        visitSequence = random.sample(self._G.nodes(),len(self._G.nodes()))
        for i in visitSequence:
            node = self._G.node[i]
            label = node["label"]
            max_labels = self.get_max_neighbor_label(i)
            if(label not in max_labels):
                newLabel = random.choice(max_labels)
                node["label"] = newLabel
        
    def get_communities(self):
        communities = collections.defaultdict(lambda:list())
        for node in self._G.nodes(True):
            label = node[1]["label"]
            communities[label].append(node[0])
        return communities.values()

    def execute(self):
        #initial label
        for i in range(self._n):
            self._G.node[i]["label"] = i
        iter_time = 0
        #populate label
        while(not self.can_stop() and iter_time<self._max_iter):
            self.populate_label()
            iter_time += 1
        return self.get_communities()
    
    
if __name__ == '__main__':
    G = nx.karate_club_graph()
    algorithm = LPA(G)
    communities = algorithm.execute()
    for community in communities:
        print community

LPA算法Python3版本如下:

# -*- coding: utf-8 -*-
'''
Created on 2017-1-4
@author: Xinyu Huang
'''
import collections
from datetime import datetime
import random

import networkx as nx


class LPA():

    def __init__(self, G, max_iter = 1000):
        self._G = G
        self._n = len(G.nodes) #number of nodes
        print(len(G.nodes), G.nodes(data=True))
        self._max_iter = max_iter

    def can_stop(self):
        # all node has the label same with its most neighbor
#         for i in range(self._n):
#             node = self._G.nodes(data=True)[i]
#             label = node[1]["label"]
#             # 这里改为node
        for (n,d) in self._G.nodes(data=True):
            label = d['label']
            max_labels = self.get_max_neighbor_label(n)
            if(label not in max_labels):
                return False
        return True

    def get_max_neighbor_label(self,node):
        m = collections.defaultdict(int)
        #print 'node',node
        #print self._G.node[node]
        for neighbor_index in self._G.neighbors(node):
            neighbor_label = self._G.node[neighbor_index]["label"]
            m[neighbor_label] += 1
        print('m.items()',m.values())
        max_v = max(m.values())
        #print [item[0] for item in m.items() if item[1] == max_v]
        return [item[0] for item in m.items() if item[1] == max_v]


    '''asynchronous update'''
    def populate_label(self):
        #random visit
        visitSequence = random.sample(self._G.nodes(),len(self._G.nodes()))
        #print('visitSequence',visitSequence)
        for i in visitSequence:
            node = self._G.node[i]
            label = node["label"]
            max_labels = self.get_max_neighbor_label(i)
            print(max_labels)
            if(label not in max_labels):
                newLabel = random.choice(max_labels)
                node["label"] = newLabel

    def get_communities(self):
        communities = collections.defaultdict(lambda:list())
        for node in self._G.nodes(True):
            label = node[1]["label"]
            communities[label].append(node[0])
        return communities.values()

    def execute(self):
        #initial label
        print(self._G.nodes)
        for i in self._G.nodes():
            print(self._G.nodes[i])
            self._G.nodes[i]["label"] = i
        iter_time = 0
        #populate label
        while(not self.can_stop() and iter_time<self._max_iter):
            self.populate_label()
            iter_time += 1
        return self.get_communities()


def read_txt2G(filepath, line):
    G=nx.Graph()
    datafile = open(filepath)
    lines = datafile.readlines(line)

    for line in lines:
        nodes=line.split(' ')
        u = int(nodes[0])
        v = int(nodes[1].strip('\n'))
        #print(u,v)
        G.add_edge(u,v)
    return G

def read_txt2B(filepath, line):
    B=nx.Graph()
    datafile = open(filepath)
    lines = datafile.readlines(line)

    for line in lines:
        nodes=line.split(' ')
        u = nodes[0]
        v = nodes[1].strip('\n')
        #print(u,v)
        node_a = 'A'+u
        node_b = 'B'+v
        B.add_node(node_a,type ='A')
        B.add_node(node_b,type ='B')
        B.add_edge(node_a,node_b)
    #print B.nodes(data=True)
    return B


def run_LPA(G):
    starttime= datetime.now()
    communities = LPA(G).execute()
    endtime =  datetime.now()
    return communities
    # print u'LAP算法执行 %s时间:' %name, (endtime - starttime).microseconds
    # print Q1(communities,G)
    #print communities
    

# if __name__ == '__main__':
#     starttime= datetime.now()
#     G = nx.karate_club_graph()
#     #B=read_txt2B('../data/u1.txt', 5000)
# #     print G.nodes(data=True)
#     #S=B2S(B, 'B')
#     #@print S.nodes(data=True)
# #     show_graph(S)
#     communities = LPA(G).execute()
#     print Q1(communities,G)
#     for community in communities:
#         print community
#     endtime =  datetime.now()
#     print u'算法执行时间:', (endtime - starttime).microseconds

不同版本之间可能存在的兼容性问题是:

1. dict.has_key()问题:在Python2中,判断字典中是否含有某个key,可以使用has_key()方法。但是,在Python3中,不支持此方法,在Python3中,支持in用法,也就是说可以直接使用 ‘key’ in dict 即可。

2. Python3下错误AttributeError: ‘dict’ object has no attribute’iteritems‘的分析与解决:

for k, v in dict1.iteritems(): 
    print(k+ "=>" + v)

在python3中,iteritems() 直接使用 values()

 

 

 

Last Modified: 2020-02-19 12:37
Views: 2.7K

[[total]] comments

Post your comment
  1. [[item.time]]
    [[item.user.username]] [[item.floor]]Floor
  2. Click to load more...
  3. Post your comment