# By: Aziz Altowayan
# http://csis.pace.edu/~ctappert/cs816-15fall/hw/hw05-kmeans.pdf
# 
x <- c(-6,-3,1,-4,0,11,8,13,8,12)
y <- c(4,7,6,0,-1,7,3,3,-1,-2)

# concat points into a dataframe
X <- data.frame(x,y)
colnames(X) <- c("x", "y")
plot(X, main = '10 data points')

## Question 1
## 
# 1) pair of starting points: (-3,7) and (11,7)
x <- c(-3, 11)
y <- c(7, 7)
init <- data.frame(x, y) # starting points
cl <- kmeans(X, init, algorithm = 'Forgy')

# plot clusters
plot(X, col=cl$cluster, main = 'Ex1: clusters and centroids with init points (-3,7), (11,7)')
# plot centeroids
points(cl$centers, col=1:2, pch=8, cex=2)
# plot init points
points(init, col=1:2, pch=7, cex=3)

cl
## K-means clustering with 2 clusters of sizes 5, 5
## 
## Cluster means:
##      x   y
## 1 -2.4 3.2
## 2 10.4 2.0
## 
## Clustering vector:
##  [1] 1 1 1 1 1 2 2 2 2 2
## 
## Within cluster sum of squares by cluster:
## [1] 84.0 73.2
##  (between_SS / total_SS =  72.4 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"    
## [5] "tot.withinss" "betweenss"    "size"         "iter"        
## [9] "ifault"
## Question 2
## 
# 2) pair of starting points: (8,3), (8,-1)
x <- c(8, 3)
y <- c(8, -1)
init <- t(data.frame(x, y)) # starting points
cl <- kmeans(X, init, algorithm = 'Forgy') # note: default algorithm yeild Left-Right clusters

# plot clusters
plot(X, col=cl$cluster, main = 'Ex2: clusters and centroids with init points (8,3), (8,-1)')
# plot centeroids
points(cl$centers, col=1:2, pch=8, cex=2)
# plot init points
points(init, col=1:2, pch=7, cex=3)

cl
## K-means clustering with 2 clusters of sizes 6, 4
## 
## Cluster means:
##   x  y
## 1 4  5
## 2 4 -1
## 
## Clustering vector:
##  [1] 1 1 1 2 2 1 1 1 2 2
## 
## Within cluster sum of squares by cluster:
## [1] 322 162
##  (between_SS / total_SS =  15.1 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"    
## [5] "tot.withinss" "betweenss"    "size"         "iter"        
## [9] "ifault"
## Question 3
## 
# 3) pair of starting points: (0,-1), (11,7)
x <- c(0, -1)
y <- c(11, 7)
init <- t(data.frame(x, y))
cl <- kmeans(X, init, algorithm = 'Forgy')

# plot clusters
plot(X, col=cl$cluster, main = 'Ex3: clusters and centroids with init points (0,-1), (11,7)')
# plot centeroids
points(cl$centers, col=1:2, pch=8, cex=2)
# plot init points
points(init, col=1:2, pch=7, cex=3)

cl
## K-means clustering with 2 clusters of sizes 5, 5
## 
## Cluster means:
##      x   y
## 1 -2.4 3.2
## 2 10.4 2.0
## 
## Clustering vector:
##  [1] 1 1 1 1 1 2 2 2 2 2
## 
## Within cluster sum of squares by cluster:
## [1] 84.0 73.2
##  (between_SS / total_SS =  72.4 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"    
## [5] "tot.withinss" "betweenss"    "size"         "iter"        
## [9] "ifault"
## Question 5
## 
# 5)
print("41 pairs yield Left-Right clustering, and 4 pairs yield Up-Down clustering")
## [1] "41 pairs yield Left-Right clustering, and 4 pairs yield Up-Down clustering"
## Question 6
## 
# 6) 
print("All of the 21 pairs (of distance >= 10) will yield Left-Right clustering")
## [1] "All of the 21 pairs (of distance >= 10) will yield Left-Right clustering"