ksmerge <- function(cg1,cg2,bg,j,size) {x <- c(sample(cg1,(size*j),replace=TRUE),sample(cg2,size*(1-j),replace=TRUE)) D <- ks.test(x,bg)$statistic return(D)} tertmerge <- function(cg1,cg2,bg,j,samps,chimax,...) { breaks <- c(min(c(cg1,cg2,bg)-1),quantile(bg,c(.33,.67)),max(c(cg1,cg2,bg)+1)) if (length(breaks) != length(unique(breaks))) breaks <- jitter(breaks,.001) cutbg <- cut(bg,breaks,labels=F) if (samps != 1) { cg1 <- sample(cg1,length(cg1),replace=TRUE) cg2 <- sample(cg2,length(cg2),replace=TRUE)} cutc1 <- cut(cg1,breaks,labels=F) cutc2 <- cut(cg2,breaks,labels=F) pcon <- j*tabulate(cutc1,nbins=3)+(1-j)*tabulate(cutc2,nbins=3) d <- chisq.test(tabulate(cutbg,nbins=3),p=pcon,rescale=T)$statistic if (is.nan(d)) d <- chimax if (d > chimax) d <- chimax return(d) } mud <- function(cg1,cg2,bg, precision=.1,samps=1,size=10000, CI=.95,method=1,chimax=100,critp = 1,...) {warnst <- options("warn") if (size > length(bg) && samps != 1) print("The CIs are not reliable") options("warn" = -1) outstats <- {} for (j in seq(0,1,precision)) {Dj <- {} for (i in 1:samps){ if (method == 1) Dj <- c(Dj,ksmerge(cg1,cg2,bg,j,size)) if (method == 2) Dj <- c(Dj,tertmerge(cg1,cg2,bg,j,samps,chimax))} forlevel <- c(j,mean(Dj),quantile(Dj,c((1-CI)/2,(1+CI)/2))) outstats <- rbind(outstats,forlevel)} if (method == 1) {plot(outstats[,1],outstats[,2],ylim=c(0,1),ylab="D statistic", xlab="Proportion from group 1")} if (method == 2) {plot(outstats[,1],outstats[,2],ylim=c(0,chimax+1),ylab="Chi-sq statistic", xlab="Proportion from group 1")} lines (spline(outstats[,1], outstats[,2])) lines(spline(outstats[,1], outstats[,3]),lty=2) lines(spline(outstats[,1], outstats[,4]),lty=2) minD <- min(outstats[,2]) minnumber <- which.min(outstats[,2]) minprop <- format(outstats[minnumber,1],digits = 4) if (method == 1) {title(main=paste("min D:",format(minD,digits=2), "; pr = ",format(minprop,digits=2)))} if (method == 2) {title(main=paste("min X2:",format(minD,digits=2), "; pr = ",format(minprop,digits=2)))} if (method == 1) {critD <- 1.36/sqrt((length(bg)))} if (method == 2) {critD <- 5.99} if (critp == 2) {x <- mudp(cg1,cg2,bg,as.numeric(minprop),method=as.numeric(method)) critD <- x[2]} if (critp != 3) {abline(h=critD,lty=4)} lines(c(minprop,minprop),c(-.1,minD),lty=4) print(c(minD,minprop)) options("warn" = as.numeric(warnst)) return(outstats) } mud2 <- function(cg1,cg2,bg, precision=.1,size=1000, CI=.95,method=1,chimax=100,...) {outstats <- {} for (j in seq(0,1,precision)) { if (method == 1) Dj <- as.numeric(ksmerge(cg1,cg2,bg,j,size)) if (method == 2) Dj <- as.numeric(tertmerge(cg1,cg2,bg,j,1,chimax)) outstats <- c(outstats,Dj)} return(min(outstats))} mudp <- function(cg1,cg2,bg,prop,replic=1000,method=1,...) {teststats <- {} for (i in 1:replic){ mix <- c(sample(cg1,length(bg)*prop,replace=T), sample(cg2,(length(bg)*(1-prop)),replace=T)) x1 <- mud2(cg1,cg2,mix,method=method) teststats <- c(teststats,x1)} outp <- quantile(teststats,c(.9,.95,.98,.99)) return(outp)} library("Hmisc",warn.conflicts=F) mudsolution <- function(cg1,cg2,bg,prop,size=1000,hist=F) { x <- c(sample(cg1,size*prop,replace=TRUE),sample(cg2,size*(1-prop),replace=TRUE)) warnst <- options("warn") options("warn" = -1) D <- format(ks.test(x,bg)$statistic,digits=3) new <- c(x,bg) y <- c(rep("1&2",length(x)),rep("3",length(bg))) ecdf(new, group=y,lty=c(1,2), label.curves=list(keys="lines"),xlab="Variable",ylab="Cum. Frequency",subtitles=F) if (hist==T){ par(mfrow=c(2,2)) hist(cg1,main="Control 1") hist(cg2,main="Control 2") hist(x,main="Blended") hist(bg,main="Experimental") par(mfrow=c(1,1))} options("warn" = as.numeric(warnst)) return(D) }