# Encoding: utf-8 # Estimations du pagerank par simulation, puissance itérée # et implémentation networkx. Exemples et illustrations. # Journée "maths & numérique" # canopé 51, Reims, 29 ajnvier 2020 # Conférence # Les réseaux sociaux : un terrain de jeu # pour mathématiciens et informaticiens # # Frédéric Blanchard │ Philippe Regnault # Centre de recherche en S.T.I.C. │ Laboratoire de mathématiques de Reims # Université de Reims Champagne-Ardenne │ Université de Reims Champagne-Ardenne import random import networkx import numpy def goto_random(M): """ Random choice of one vertex in G (M) """ return random.random(range(len(M))) def get_successors(M, v): """ Get the list of successors of vertex v in G (M) """ successors = [] for j in range(len(M)) : if M[v][j] == 1 : successors.append(j) return successors def follow_random(M, v): """ Random choice of one successor of the vertex v in G (M) """ v_successors = get_successors(M, v) if len(v_successors) == 0 : return None else: return random.choice(v_successors) def random_surfer(M, nbs, alpha = 0.15, return_path = False): """ Random surf on G (M) with nbs steps """ position = random.choice(range(len(M))) path = [position] for _ in range(nbs) : r = random.random() if r >= alpha : position = follow_random(M, position) if position == None : position = random.choice(range(len(M))) else : position = random.choice(range(len(M))) path.append(position) if return_path : return path else : return position def estim_pr_v1(M, nbr=100000, nbs=100, alpha = 0.15): """ Pagerank estimation using finish frequencies of many random surfers """ count = [0] * len(M) for _ in range(nbr) : count[random_surfer(M, nbs, alpha = alpha)] += 1 return {i : count[i] / nbr for i in range(len(count))} def estim_pr_v2(M, nbs=1500, alpha = 0.15): """ Pagerank estimation using visiting frequencies of one random surfer """ n = len(M) path = random_surfer(M, nbs = nbs, alpha = alpha, return_path = True) pr = [] for s in range(len(M)) : pr.append(path.count(s) / nbs) return {i : pr[i] for i in range(len(pr))} def compute_pr(M, alpha = 0.15, epsilon = 1e-12): """ Compute pagerank using matrix of transition """ n = len(M) stab = False T = numpy.array(M, dtype=numpy.float64) for i in range(numpy.shape(T)[0]) : T[i,:] = T[i,:] / sum(T[i,:]) T = (alpha / n) * numpy.ones((n, n)) + (1-alpha) * T P = numpy.copy(T) while not stab : P_new = numpy.matmul(P, T) stab = sum(sum(abs(P-P_new))) < epsilon P = P_new pr = list(P[0,:]) return {i : pr[i] for i in range(len(pr))} # ----------------------------------------------------------------------------- # Script principal # ----------------------------------------------------------------------------- if __name__ == "__main__" : # Matrice de test M = [[0, 1, 0, 0, 0, 0, 0, 0], [1, 0, 1, 1, 0, 0, 1, 0], [1 , 0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 1, 0, 1], [0, 1, 0, 0, 0, 1, 1, 0], [1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1 , 0, 1, 0, 0, 0], [0, 0, 1 , 1, 0, 0, 0, 0]] # Calcul du pagerank v1 prv1 = estim_pr_v1(M, alpha = 1-0.85) # Calcul du pagerank v2 prv2 = estim_pr_v2(M, alpha = 1-0.85) # Calcul du pagerank nx prnx = networkx.pagerank(networkx.DiGraph(numpy.matrix(M)), alpha = 0.85) # Calcul du pagerank par puissance de la matrice de transition prmt = compute_pr(M) # Affichage des résultats for i in range(len(M)) : print(f"Sommet {i+1}") print(f" - prV1 : {prv1[i]}") print(f" - prV2 : {prv2[i]}") print(f" - prNX : {prnx[i]}") print(f" - prMT : {prmt[i]}")