import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from node2vec import Node2Vec
import seaborn as sns
G=nx.Graph()
G.add_nodes_from(['John','Mary','Sara','Helen','Tim','Jim','John'])
G.add_edges_from([('Mary','Sara'),('Sara','Helen'),('Helen','Jim'),('Helen','Tim'),
('Jim','Tim'),('Jim','John'),('Tim','John'),
('Mary','Maria'),('Mike','Mel'),('Mel','Mary'),
('Mike','Mary'),('Maria','Mel'),('Mike','Maria'),('Mel','Maya'),
])
nx.draw(G,node_color='lightblue',node_size=1000,with_labels=True)
plt.show()
#1/p = unormalized probability to return to source
#1/q = unormalized probability to move away from source
#use just 2 dimensions for this small example ---> don't expect great results
#try different parameters to see what works
node2vec = Node2Vec(G, dimensions=2, walk_length=10, num_walks=100,workers=2,p=1,q=2)
model = node2vec.fit(window=3, min_count=1)
#find NN of 'John'
for node, _ in model.wv.most_similar('John'):
print(node)
#print 2-dim embeddings for each node
for node in model.wv.vocab:
print(node,'->',model.wv[node])
np2d = np.array([model.wv[x] for x in model.wv.vocab])
sns.set_style('whitegrid')
figure = plt.figure(figsize=(8, 8))
ax = figure.add_subplot(111)
ax.scatter(np2d[:, 0], np2d[:, 1])
for i, txt in enumerate(model.wv.vocab):
ax.annotate(txt, (np2d[i,0]+.02, np2d[i,1]+.02),size=16)
from sklearn.cluster import KMeans
km = KMeans(n_clusters=2)
km.fit(np2d)
#get cluster assignment labels
labels = km.labels_
centers = np.array(km.cluster_centers_)
colors=['blue' if x==0 else 'orange' for x in labels]
figure = plt.figure(figsize=(8, 8))
ax = figure.add_subplot(111)
ax.scatter(np2d[:,0],np2d[:,1],c=colors)
ax.scatter(centers[:,0], centers[:,1], marker="x", color='r')
for i, txt in enumerate(model.wv.vocab):
ax.annotate(txt, (np2d[i,0]+.02, np2d[i,1]+.02),size=16)
plt.show()