Language | R |
Description | Animate the process of kmeans cluster. |
The animation graph can also generate by ImageMagick.
draw.kmeans = function(w, k=2, dimension=2, iteration=500,
method="euclidean", initial="random", save=FALSE)
w | Data matrix where rows are samples and columns are variables. |
k | Number of clusters. |
dimension | Whether draw graph in 2D space or 3D space. |
iteration | Maximum iterations of clustering. |
method | The metric to calculate distance of variables. |
initial | how to initialize the centers of clusters. |
save | Whether save iamges. |
l = 1000
w = matrix(rnorm(l*3), l)
draw.kmeans(w, k=5)


draw.kmeans = function(w, k=2, dimension=2, iteration=500,
method="euclidean", initial="random", save=FALSE) {
require(rgl)
if(!(dimension ==2 || dimension == 3)) {
stop("dimension should be 2 or 3\n")
}
if(dimension > dim(w)[2]) {
stop("dimesion should be no smaller than that of data\n")
}
if(dim(w)[2] < 2) {
stop("w must contain at least two dimensions\n")
}
if(dim(w)[2] > dimension) {
pca.res = prcomp(w)
comp = pca.res$x[,1:dimension]
explain = sum(pca.res$sdev[1:dimension]^2)/sum(pca.res$sdev^2)
}
center = matrix(0, nrow=k, ncol=dim(w)[2])
if(initial == "quantile") {
for(i in 1:k) {
center[i, ] = apply(w, 2, function(x) {quantile(x, i/(k+1))})
}
}
if(initial == "random") {
center = w[sample(1:dim(w)[1], k, replace=FALSE), ]
}
if(save) {
dir = paste(sample(letters, 26, replace=FALSE), collapse="")
dir = paste("_", dir, sep="")
dir.create(dir)
setwd(dir)
}
if(save && dimension == 2) {
png(file="0.k-means.png")
}
if(dimension == 2) {
if(dim(w)[2] > 2) {
plot(comp[,1], comp[,2], xlab="First component", ylab="Second component",
sub=paste("initial, explain =", explain), main="k-means cluster animation")
cp = pca.trans(center, pca.res, k = 2)
points(cp[,1], cp[,2], col="red", pch=20)
text(cp[,1], cp[,2], 1:k, cex=1.5)
}
else {
plot(w[,1], w[,2], xlab="x", ylab="y", sub="initial", main="k-means cluster animation")
points(center[,1], center[,2], col="red", pch=20)
text(center[,1], center[,2], 1:k, cex=1.5)
}
}
if(dimension == 3) {
if(dim(w)[2] > 3) {
plot3d(comp[,1], comp[,2], comp[, 3], xlab="First component", ylab="Second component",
zlab="Third component", sub=paste("initial, explain =", explain),
main="k-means cluster animation")
cp = pca.trans(center, pca.res, k = 3)
points3d(cp[,1], cp[,2], cp[,3], col="red", pch=20)
text3d(cp[,1], cp[,2], cp[,3], 1:k, cex=rep(1.5, dim(cp)[1]))
}
else {
plot3d(w[,1], w[,2], w[,3], xlab="x", ylab="y", zlab="z", sub="initial",
main="k-means cluster animation")
points3d(center[,1], center[,2], center[,3], col="red", pch=20)
text3d(center[,1], center[,2], center[,3], 1:k, cex=rep(1.5, dim(center)[1]))
}
}
if(save && dimension == 2) {
dev.off()
}
if(save && dimension == 3) {
snapshot3d("0.k-means.png")
}
# do kmeans
belong.flag = NULL
iter = 1
while(iter <= iteration) {
Sys.sleep(1)
d = matrix(0, nrow=dim(w)[1], ncol=k)
for (c in 1:k) {
d[, c] = apply(w, 1, function(x){distance(x, center[c,], method=method)})
}
belong = matrix(FALSE, nrow=dim(w)[1], ncol=k)
for (b in 1:dim(belong)[1]) {
belong[b, which.min(d[b, ])] = TRUE
}
for(i in 1:k) {
center[i, ] = apply(w, 2, function(x) {sum(x * belong[, i])/sum(belong[, i])})
}
# draw
if(save && dimension == 2) {
png(file=paste(iter, ".k-means.png", sep=""))
}
color = numeric(dim(belong)[1])
for(i in 1:dim(belong)[2]) {
color = color + i*belong[,i]
}
if(dimension == 2) {
if(dim(w)[2] > 2) {
plot(comp[,1], comp[,2], xlab="First component", ylab="Second component",
sub=paste(iter, "initial, explain =", explain), main="k-means cluster animation", col=color)
cp = pca.trans(center, pca.res, k = 2)
points(cp[,1], cp[,2], col="red", pch=20)
text(cp[,1], cp[,2], 1:k, cex=1.5)
for(i in 1:k) {
x = comp[belong[, i], 1]
y = comp[belong[, i], 2]
hull = chull(x, y)
polygon(x[hull], y[hull], border=i)
}
}
else {
plot(w[,1], w[,2], xlab="x", ylab="y", sub=paste(iter, "iterations"),
main="k-means cluster animation", col=color)
points(center[,1], center[,2], col="red", pch=20)
text(center[,1], center[,2], 1:k, cex=1.5)
for(i in 1:k) {
x = w[belong[, i], 1]
y = w[belong[, i], 2]
hull = chull(x, y)
polygon(x[hull], y[hull], border=i)
}
}
}
if(dimension == 3) {
if(dim(w)[2] > 3) {
plot3d(comp[,1], comp[,2], comp[, 3], xlab="First component", ylab="Second component",
zlab="Third component", sub=paste(iter, "initial, explain =", explain),
main="k-means cluster animation", col=color)
cp = pca.trans(center, pca.res, k = 3)
points3d(cp[,1], cp[,2], cp[,3], col="red")
text3d(cp[,1], cp[,2], cp[,3], 1:k, cex=rep(1.5, dim(cp)[1]))
}
else {
plot3d(w[,1], w[,2], w[,3], xlab="x", ylab="y", zlab="z", sub=paste(iter, "iterations"),
main="k-means cluster animation", col=color)
points3d(center[,1], center[,2], center[,3], col="red")
text3d(center[,1], center[,2], center[,3], 1:k, cex=rep(1.5, dim(center)[1]))
}
}
if(save && dimension == 2) {
dev.off()
}
if(save && dimension == 3) {
snapshot3d(paste(iter, ".k-means.png", sep=""))
}
cat(paste(iter, "iterations\n"))
if(identical(belong, belong.flag)) {
break
}
belong.flag = belong
iter = iter + 1
}
if(save) {
pstr = Sys.getenv()["PATH"]
path = unlist(strsplit(pstr, ";"))
image.magick.path = path[grepl("ImageMagick", path)]
convert.path = file.path(image.magick.path, "convert.exe")
command = paste("\"", convert.path, "\"", " -delay 80 *.k-means.png ../output.gif")
#system(command)
#file.remove(list.files(pattern=".png"))
setwd("..")
#file.remove(dir)
cat(paste("file at ", getwd() , "/", dir, "\n", sep=""))
}
rownames(belong) = rownames(w)
colnames(belong) = paste("C", 1:k, sep="")
return(invisible(belong))
}
# find the principle component
pca.trans = function(m, pca.res=prcomp(m), k =2) {
if(k > dim(m)[2]) {
stop("!")
}
w = matrix(0, nrow=dim(m)[1], ncol=k)
for(i in 1:dim(m)[1]) {
for ( j in 1:k) {
w[i,j] = sum((m[i, ] - pca.res$center)*pca.res$rotation[,j])
}
}
return(w)
}
# calculate distance between two vectors
distance = function(x, y, method="euclidean") {
m = matrix(0, nrow=2, ncol=length(x))
m[1, ] = x
m[2, ] = y
if(method == "pearson" || method == "spearman" || method == "kendall") {
d = 1 - cor(x, y, method=method)
}
else {
d = as.vector(dist(m, method=method))
}
return(d)
}