## source("cluster.R")

## bank data ################################################

library(Rfwdmv)
data(bank.dat)

x <- bank.dat

x <- data.frame(x,c(rep("genuine",100),rep("forged",100)))
names(x) <- c("length","left.height","right.height",
              "lower.frame","upper.frame","diagonal","group")

## Hierarchical Cluster Analysis ############################

d1 <- dist(x[,1:6])##,method="minkowski",p=1)
h1 <- hclust(d1)
plot(h1)

m1 <- cutree(h1,k=4)
par(mfrow=c(1,2))
j <- c(4,6)
j <- 4:6
j <- 5:6
plot(x[,j],col=as.numeric(x$group),main="True Groups")
plot(x[,j],col=m1,main="Found Clusters")
par(mfrow=c(1,1))

## Compare Distance Measures ################################

d1 <- dist(x[,1:6])
d2 <- dist(x[,1:6],method="minkowski",p=3)
h1 <- hclust(d1)
h2 <- hclust(d2)
par(mfrow=c(1,2))
plot(h1,main="Euclidean Distance")
plot(h2,main="Minkowski Distance")
par(mfrow=c(1,1))

## Smaller Data Set #########################################

xx <- x[sample(1:nrow(x),20),]

d1 <- dist(xx[,1:6])
d2 <- dist(xx[,1:6],method="minkowski",p=1)
h1 <- hclust(d1)
h2 <- hclust(d2)
par(mfrow=c(1,2))
plot(h1,main="Euclidean Distance")
plot(h2,main="Minkowski Distance")
par(mfrow=c(1,1))

## Compare Hierarchical Methods #############################

xx <- x[sample(1:nrow(x),20),]

d1 <- dist(xx[,1:6])
d2 <- dist(xx[,1:6])
h1 <- hclust(d1,method="complete")
h2 <- hclust(d2,method="single")
par(mfrow=c(1,3))
plot(h1,main="Complete Linkage")
plot(h2,main="Single Linkage")
j <- 5:6
plot(xx[,j],col="white",main="Data")
text(xx[,j],dimnames(xx)[[1]])
par(mfrow=c(1,1))

## K-Means Clustering #######################################

k1 <- kmeans(x[,1:6],3)
par(mfrow=c(1,2))
j <- 5:6
plot(x[,j],col=as.numeric(x$group),main="True Groups")
plot(x[,j],col=k1$cluster,main="Found Clusters")
par(mfrow=c(1,1))

#############################################################

## automarxx data ###########################################

x <- read.csv("CarMarks.csv",sep=";",dec=",")
dimnames(x)[[1]] <- x[,1] 
x <- x[,2:ncol(x)]

## Hierarchical Cluster Analysis ############################

d1 <- dist(x[,1:6])##,method="minkowski",p=1)
h1 <- hclust(d1)
plot(h1)

## K-Means Clustering #######################################

k <- 4
auto.pc <- princomp(x[,1:6],cor=FALSE)
m1 <- cutree(h1,k)

k1 <- kmeans(x[,1:6],k)
plot(auto.pc$scores[,1],auto.pc$scores[,2],col="white",
     xlab="PC1",ylab="PC2",main="First 2 PCs")
text(auto.pc$scores[,1],auto.pc$scores[,2],dimnames(x)[[1]],col=k1$cluster)

par(mfrow=c(1,2))
j <- 5:6
plot(auto.pc$scores[,1],auto.pc$scores[,2],col="white",
     xlab="PC1",ylab="PC2",main="First 2 PCs, Hierarchical")
text(auto.pc$scores[,1],auto.pc$scores[,2],dimnames(x)[[1]],col=m1)
plot(auto.pc$scores[,1],auto.pc$scores[,2],col="white",
     xlab="PC1",ylab="PC2",main="First 2 PCs, K-Means")
text(auto.pc$scores[,1],auto.pc$scores[,2],dimnames(x)[[1]],col=k1$cluster)
par(mfrow=c(1,1))