import random
from array import *
import matplotlib.pyplot as plt
import numpy as np
from scipy.spatial import distance
from ipywidgets import IntSlider
from ipywidgets.embed import embed_minimal_html
slider = IntSlider(value=40)
embed_minimal_html('export.html', views=[slider], title='Widgets export')
def initializeDataSet():
for i in range(20):
randomCoordinate = random.randint(1,100), random.randint(1,100)
dataSet.append(randomCoordinate)
def convertDataSet():
for i in range(len(dataSet)):
xValues.append(dataSet[i][0])
yValues.append(dataSet[i][1])
dataSet = []
xValues = []
yValues = []
initializeDataSet();
convertDataSet();
plt.plot(xValues,yValues, 'ko')
def findMaxOfX(dataSet):
maxValue = max(dataSet)
return maxValue[0]
def findMaxOfY(dataSet):
maxValue = max(dataSet, key=lambda x: x[1])
return maxValue[1]
def plotCenterPoints(): #To be used in createRandomCentroids()
centerPointColor = ['bD','rD','gD','yD']
for i in range(len(centroids)):
plt.plot(centroids[i][0],centroids[i][1], centerPointColor[i])
def createRandomCentroids(dataSet, k):
centroids.clear()
for i in range(k):
randomCoordinate = random.randint(1,findMaxOfX(dataSet)), random.randint(1,findMaxOfY(dataSet))
centroids.append(randomCoordinate)
centroids = []
createRandomCentroids(dataSet, 2)
plotCenterPoints()
plt.plot(xValues,yValues,'ko')
For each data in the dataSet:
For each centroid:
get the distance between the data and each centroid and append to listOfDistances
get the minimum distance from the list and append to nearestCentroid
clear the listOfDistances after each iteration
def getDistance(firstCoord, secondCoord): #To be used in getListOfDistances()
return distance.euclidean(firstCoord, secondCoord)
def getMinimumDistance(): #To be used in getListOfDistances()
minimum = min(listOfDistances, key=lambda x: x[1])
nearestCentroid.append(minimum[0]) #minimum[0] = list number
def createListOfShortestDistances():
distanceData = []
nearestCentroid.clear()
for i in range(len(dataSet)):
for j in range(len(centroids)):
distanceData = j, getDistance(dataSet[i], centroids[j])
listOfDistances.append(distanceData)
getMinimumDistance()
listOfDistances.clear()
listOfDistances = []
nearestCentroid = []
createListOfShortestDistances()
print(nearestCentroid)
def createClusterIndex(): #needs to be included in createClusters()
cluster_xValues.clear()
cluster_yValues.clear()
for i in range(len(centroids)):
cluster_xValues.append([i])
cluster_yValues.append([i])
def createClusters():
createClusterIndex()
for i in range(len(nearestCentroid)):
index = nearestCentroid[i]
cluster_xValues[index].append(xValues[i])
cluster_yValues[index].append(yValues[i])
colors = ['bo', 'ro', 'go', 'yo', 'co', 'mo', 'ko']
cluster_xValues = []
cluster_yValues = []
createClusters()
def plotClusters():
for i in range(len(cluster_xValues)):
for j in range(len(cluster_xValues[i])-1):
plt.plot(cluster_xValues[i][j+1],cluster_yValues[i][j+1], colors[i])
plotClusters()
plotCenterPoints()
def getAverage(list, index):
sumOfValues = 0;
length = 0;
for i in range(len(list[index])-1):
sumOfValues = sumOfValues + list[index][i+1]
length = length + 1
if(length > 0):
return sumOfValues/length
else:
return 0
def shiftCentroids():
distance = 0
if(len(xAverage) == 0):
for i in range(len(centroids)):
nu_xAverage = getAverage(cluster_xValues, i)
nu_yAverage = getAverage(cluster_yValues, i)
xAverage.append(nu_xAverage)
yAverage.append(nu_yAverage)
centroids[i] = (nu_xAverage, nu_yAverage)
return False
else:
for i in range(len(centroids)):
nu_xAverage = getAverage(cluster_xValues, i)
nu_yAverage = getAverage(cluster_yValues, i)
distance = distance + (xAverage[i] - nu_xAverage)
distance = distance + (yAverage[i] - nu_yAverage)
xAverage[i] = nu_xAverage
yAverage[i] = nu_yAverage
centroids[i] = (nu_xAverage, nu_yAverage)
if distance == 0:
return True
else:
return False
def plotNewCentroids():
for i in range(len(centroids)):
plt.plot(xAverage[i],yAverage[i], centerPointColors[i])
xAverage = []
yAverage = []
centerPointColors = ['bd', 'rd', 'gd', 'yd', 'cd', 'md', 'kd']
shiftCentroids()
plotNewCentroids()
plotClusters()
def checkToSeeIfDataChangesClusters():
nearestCentroid.clear()
createListOfShortestDistances()
createClusters()
checkToSeeIfDataChangesClusters()
plotClusters()
plotNewCentroids()
shiftCentroids()
plotClusters()
plotNewCentroids()
print(centroids)
def kMeans(dataSet, k):
xAverage.clear()
yAverage.clear()
convergence = False;
trainingCycles = 1
createRandomCentroids(dataSet, k)
createListOfShortestDistances()
createClusters()
while (convergence == False) and (trainingCycles < 100):
convergence = shiftCentroids()
checkToSeeIfDataChangesClusters()
trainingCycles = trainingCycles + 1
return trainingCycles
def getTrainingCycles(nuDataSet, k):
print("Total cycles until convergence: " + str(kMeans(nuDataSet, k)))
plotClusters()
plotNewCentroids()
getTrainingCycles(dataSet, 2)
getTrainingCycles(dataSet, 3)
getTrainingCycles(dataSet, 4)
getTrainingCycles(dataSet, 5)
getTrainingCycles(dataSet, 6)
getTrainingCycles(dataSet, 7)
dataSet = []
xValues = []
yValues = []
for i in range(10):
randomCoordinate = random.randint(1,25), random.randint(1,25)
dataSet.append(randomCoordinate)
for i in range(9):
randomCoordinate = random.randint(75,100), random.randint(75,100)
dataSet.append(randomCoordinate)
dataSet.append((50,50))
convertDataSet()
print(dataSet)
getTrainingCycles(dataSet, 2)
getTrainingCycles(dataSet, 4)
print(centroids)
for i in range(len(centroids)):
print(xAverage[i],yAverage[i])
def monteCarlo(nuDataSet, k):
n = 1000
counter = 0
for i in range(n):
counter = counter + kMeans(nuDataSet, k)
Average = counter/n
print("Average Convergence Rate: " + str(Average) + " training cycles")
monteCarlo(dataSet, 4)
def createClusteredDataSet(clusterSize, spread):
dataSet.clear()
xValues.clear()
yValues.clear()
for i in range(clusterSize):
randomCoordinate = random.randint(1,spread), random.randint(1,spread)
dataSet.append(randomCoordinate)
for i in range(clusterSize):
randomCoordinate = random.randint(75,75 + spread), random.randint(75,75+spread)
dataSet.append(randomCoordinate)
convertDataSet()
createClusteredDataSet(10,15)
plt.plot(xValues,yValues, 'ko')
monteCarlo(dataSet, 2)
plotClusters()
plotNewCentroids()
createClusteredDataSet(10,30)
plt.plot(xValues,yValues, 'ko')
monteCarlo(dataSet, 2)
plotClusters()
plotNewCentroids()
createClusteredDataSet(10,60)
plt.plot(xValues,yValues, 'ko')
monteCarlo(dataSet, 2)
plotClusters()
plotNewCentroids()
createClusteredDataSet(10,120)
plt.plot(xValues,yValues, 'ko')
monteCarlo(dataSet, 2)
plotClusters()
plotNewCentroids()
createClusteredDataSet(20,60)
plt.plot(xValues,yValues, 'ko')
monteCarlo(dataSet, 2)
plotClusters()
plotNewCentroids()
createClusteredDataSet(40,60)
plt.plot(xValues,yValues, 'ko')
monteCarlo(dataSet, 2)
plotClusters()
plotNewCentroids()
createClusteredDataSet(80,60)
plt.plot(xValues,yValues, 'ko')
monteCarlo(dataSet, 2)
plotClusters()
plotNewCentroids()