# dc.R # T. J. Finney, 2010-08-02. message("Divisive Clustering.") # Reset. rm(list=ls(all=TRUE)) # Set parameters. message("Distance matrix (as CSV file): ", dist.path <- "../dist/conf.15.SMD.csv") message("Output directory for DC result: ", dc.dir <- "../dc/") # Set writing mode: [1] write results; [2] do not write results. message("Write mode: ", write <- c(TRUE, FALSE)[1]) # Script # Make names. counts.path <- sub(".SMD.csv", ".counts.csv", dist.path, fixed=TRUE) #counts.path <- sub(".csv", ".counts.csv", dist.path, fixed=TRUE) steps <- unlist(strsplit(dist.path, "/")) dc.path <- sub(".csv", ".png", paste(dc.dir, steps[length(steps)], sep=""), fixed=TRUE) # Read and check inputs. dist.frame <- read.csv(dist.path, row.names=1) colnames(dist.frame) <- rownames(dist.frame) counts.frame <- read.csv(counts.path, row.names=1) stopifnot(rownames(dist.frame) == rownames(counts.frame)) # Prepare to produce results. # Convert distance matrix to a distance object. dist <- as.dist(dist.frame) # Shut down open graphics devices. graphics.off() # Perform DC analysis. require(cluster) DC <- diana(dist) message("Divisive coefficient: ", round(DC$dc, digits = 2)) par(bg="white") plot(DC, which=2, main="", cex=0.6) # Cut tree at upper critical distance. n <- round(mean(counts.frame[[1]])) p <- mean(dist) upper <- qbinom(0.975, n, p)/n message("Cut tree at upper critical distance of ", round(upper, digits=3)) print(cutree(as.hclust(DC), h=upper)) # Write results. if (write) { message("Write: ", dc.path) dev.print(png, file=dc.path, width=1000, height=600) } else { message("Would write: ", dc.path) }