## FDR calculation based on randomization distribution. args <- commandArgs(trailingOnly=TRUE) if(length(args) < 2){ stop("\n---------------------------\n Usage: Rscript 1.2.FDR.r randomization_matrix\tmaximum_num_for_FDR\tcut_to_obtain_significant_category\n randomization_matrix: matrix generated from randomization\n maximum_num_for_FDR: the maximum number of false positives in randomization to consider to calculate FDR\n [Optional] cut_to_obtain_significant_category : the cut_off you give based on the FDR calculation \n---------------------------\n") } input.file <- args[1] false.num <- as.numeric(args[2]) randomization <- read.table(input.file,row.names=1) cate.name <- rownames(randomization) observed.value <- randomization[,3] random.num = dim(randomization)[2] if (false.num >= (random.num-3)){ stop("maximum number of false positives should be smaller than the number of randomizations...\n") } if (is.na(args[3]) == FALSE){ #### get categories #### select = as.numeric(args[3]) sig.cate = c() for (i in 1:length(cate.name)){ random.values = sort(randomization[cate.name[i],4:random.num],decreasing=T) number=select cut.off = random.values[number+1] value = observed.value[i] if (value > cut.off){ sig.cate = c(sig.cate,cate.name[i]) } } data = randomization[sig.cate,1:3] data.order = data[order(data[,3],decreasing=T),] colnames(data.order)=c("rare","overall","rare/overall") write.table(data.order,file="random/significant.categories",row.names=T,sep="\t",quote=F,col.names=NA) }else{ ### calculate empirical FDR #### R_t = c(rep(0,false.num)) FDR = c(rep(0,false.num)) for (i in 1:length(cate.name)){ random.values = sort(randomization[cate.name[i],4:random.num],decreasing=T) for (j in 1:false.num){ number = j cut.off = random.values[number+1] value = observed.value[i] if (value > cut.off){ R_t[j] = R_t[j] + 1 } } } for (j in 1:false.num){ if (R_t[j] >0){ FDR[j] = (j * length(cate.name)/(random.num-3))/R_t[j] }else{ FDR[j] = 0 } } pdf(file=paste("random/FDR.pdf",sep="")) plot((1:false.num), FDR,xlab="Cut-offs", ylab="FDR",pch=15,col="red",cex=1.2) dev.off() data = cbind(matrix(1:false.num,ncol=1),matrix(FDR,ncol=1)) colnames(data)=c("cut_off","FDR") write.table(data,file = "random/FDR.cut.off",row.names=F,col.names=T,sep="\t",quote=F) }