In [1]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from node2vec import Node2Vec
import seaborn as sns
In [2]:
G=nx.Graph()
G.add_nodes_from(['John','Mary','Sara','Helen','Tim','Jim','John'])

G.add_edges_from([('Mary','Sara'),('Sara','Helen'),('Helen','Jim'),('Helen','Tim'),
                  ('Jim','Tim'),('Jim','John'),('Tim','John'),
                  ('Mary','Maria'),('Mike','Mel'),('Mel','Mary'),
                  ('Mike','Mary'),('Maria','Mel'),('Mike','Maria'),('Mel','Maya'),
])
nx.draw(G,node_color='lightblue',node_size=1000,with_labels=True)   
plt.show()
In [3]:
#1/p = unormalized probability to return to source
#1/q = unormalized probability to move away from source
#use just 2 dimensions for this small example ---> don't expect great results
#try different parameters to see what works
node2vec = Node2Vec(G, dimensions=2, walk_length=10, num_walks=100,workers=2,p=1,q=2)
Computing transition probabilities: 100%|██████████| 10/10 [00:00<00:00, 8785.72it/s]
In [4]:
model = node2vec.fit(window=3, min_count=1)
In [5]:
#find NN of 'John'
for node, _ in model.wv.most_similar('John'):
    print(node)
Tim
Jim
Helen
Sara
Mary
Maya
Mike
Maria
Mel
In [6]:
#print 2-dim embeddings for each node
for node in model.wv.vocab:
    print(node,'->',model.wv[node])
Helen -> [-1.2490832  1.1476479]
Sara -> [-0.24129972  1.15405   ]
Jim -> [-1.9271443  1.2589829]
Tim -> [-1.8020566  1.188781 ]
John -> [-1.7490398  1.1673741]
Mike -> [0.7248171 1.656128 ]
Mel -> [1.0139871 1.9101939]
Maya -> [0.59177995 1.4900198 ]
Maria -> [0.87540966 1.818873  ]
Mary -> [0.6717256 1.7141112]
In [7]:
np2d = np.array([model.wv[x] for x in model.wv.vocab])
In [8]:
sns.set_style('whitegrid')
figure = plt.figure(figsize=(8, 8))
ax = figure.add_subplot(111)

ax.scatter(np2d[:, 0], np2d[:, 1])
for i, txt in enumerate(model.wv.vocab):
    ax.annotate(txt, (np2d[i,0]+.02, np2d[i,1]+.02),size=16)
In [9]:
from sklearn.cluster import KMeans
km = KMeans(n_clusters=2)
km.fit(np2d)
#get cluster assignment labels
labels = km.labels_
centers = np.array(km.cluster_centers_)
colors=['blue' if x==0 else 'orange' for x in labels]
figure = plt.figure(figsize=(8, 8))
ax = figure.add_subplot(111)
ax.scatter(np2d[:,0],np2d[:,1],c=colors)
ax.scatter(centers[:,0], centers[:,1], marker="x", color='r')
for i, txt in enumerate(model.wv.vocab):
    ax.annotate(txt, (np2d[i,0]+.02, np2d[i,1]+.02),size=16)
plt.show()