## source("cluster.R") ## bank data ################################################ library(Rfwdmv) data(bank.dat) x <- bank.dat x <- data.frame(x,c(rep("genuine",100),rep("forged",100))) names(x) <- c("length","left.height","right.height", "lower.frame","upper.frame","diagonal","group") ## Hierarchical Cluster Analysis ############################ d1 <- dist(x[,1:6])##,method="minkowski",p=1) h1 <- hclust(d1) plot(h1) m1 <- cutree(h1,k=4) par(mfrow=c(1,2)) j <- c(4,6) j <- 4:6 j <- 5:6 plot(x[,j],col=as.numeric(x$group),main="True Groups") plot(x[,j],col=m1,main="Found Clusters") par(mfrow=c(1,1)) ## Compare Distance Measures ################################ d1 <- dist(x[,1:6]) d2 <- dist(x[,1:6],method="minkowski",p=3) h1 <- hclust(d1) h2 <- hclust(d2) par(mfrow=c(1,2)) plot(h1,main="Euclidean Distance") plot(h2,main="Minkowski Distance") par(mfrow=c(1,1)) ## Smaller Data Set ######################################### xx <- x[sample(1:nrow(x),20),] d1 <- dist(xx[,1:6]) d2 <- dist(xx[,1:6],method="minkowski",p=1) h1 <- hclust(d1) h2 <- hclust(d2) par(mfrow=c(1,2)) plot(h1,main="Euclidean Distance") plot(h2,main="Minkowski Distance") par(mfrow=c(1,1)) ## Compare Hierarchical Methods ############################# xx <- x[sample(1:nrow(x),20),] d1 <- dist(xx[,1:6]) d2 <- dist(xx[,1:6]) h1 <- hclust(d1,method="complete") h2 <- hclust(d2,method="single") par(mfrow=c(1,3)) plot(h1,main="Complete Linkage") plot(h2,main="Single Linkage") j <- 5:6 plot(xx[,j],col="white",main="Data") text(xx[,j],dimnames(xx)[[1]]) par(mfrow=c(1,1)) ## K-Means Clustering ####################################### k1 <- kmeans(x[,1:6],3) par(mfrow=c(1,2)) j <- 5:6 plot(x[,j],col=as.numeric(x$group),main="True Groups") plot(x[,j],col=k1$cluster,main="Found Clusters") par(mfrow=c(1,1)) ############################################################# ## automarxx data ########################################### x <- read.csv("CarMarks.csv",sep=";",dec=",") dimnames(x)[[1]] <- x[,1] x <- x[,2:ncol(x)] ## Hierarchical Cluster Analysis ############################ d1 <- dist(x[,1:6])##,method="minkowski",p=1) h1 <- hclust(d1) plot(h1) ## K-Means Clustering ####################################### k <- 4 auto.pc <- princomp(x[,1:6],cor=FALSE) m1 <- cutree(h1,k) k1 <- kmeans(x[,1:6],k) plot(auto.pc$scores[,1],auto.pc$scores[,2],col="white", xlab="PC1",ylab="PC2",main="First 2 PCs") text(auto.pc$scores[,1],auto.pc$scores[,2],dimnames(x)[[1]],col=k1$cluster) par(mfrow=c(1,2)) j <- 5:6 plot(auto.pc$scores[,1],auto.pc$scores[,2],col="white", xlab="PC1",ylab="PC2",main="First 2 PCs, Hierarchical") text(auto.pc$scores[,1],auto.pc$scores[,2],dimnames(x)[[1]],col=m1) plot(auto.pc$scores[,1],auto.pc$scores[,2],col="white", xlab="PC1",ylab="PC2",main="First 2 PCs, K-Means") text(auto.pc$scores[,1],auto.pc$scores[,2],dimnames(x)[[1]],col=k1$cluster) par(mfrow=c(1,1))