// -*- mode: C++; c-indent-level: 4; c-basic-offset: 4; indent-tabs-mode: nil; -*- #include #include #include using namespace std; using namespace Rcpp; // [[Rcpp::depends(RcppEigen)]] // [[Rcpp::export]] ///////////////////////////////// Calculate means of probability matrix NumericVector colMeans(NumericMatrix P_mat) { Eigen::Map P = as >(P_mat); Eigen::VectorXd means(P.cols()); means = P.colwise().mean(); return Rcpp::NumericVector(wrap(means)); } ///////////////////////////////// // [[Rcpp::export]] ///////////////////////////////// Calculate mean values of each cluster NumericMatrix calc_mu(NumericMatrix P_mat,NumericMatrix dimensionssample) { Eigen::Map P = as >(P_mat); Eigen::Map D = as >(dimensionssample); Eigen::VectorXd w(P.cols()); w = P.colwise().sum(); Eigen::MatrixXd mu = D.transpose()*P; for(int i=0;i P = as >(P_mat); Eigen::Map M = as >(mu); Eigen::Map D = as >(dimensionssample); List listOfSigmas(M.cols()); Eigen::VectorXd mean; Eigen::VectorXd P_i; for(int i=0;i p = as >(pi); Eigen::Map M = as >(mu); Eigen::Map D = as >(dimensionssample); Eigen::VectorXd mean; Eigen::MatrixXd T(D.rows(),M.cols()); double logSqrt2Pi=log(sqrt(2*M_PI)); double cons = D.cols()*logSqrt2Pi; for(int i=0;i S = as >(sigma(i)); typedef Eigen::LLT Chol; Chol chol(S); const Chol::Traits::MatrixL& L = chol.matrixL(); Eigen::VectorXd quadform = ((((L.solve(D.transpose().colwise() - mean).colwise().squaredNorm())*-0.5).array()-cons).array().exp()).array()/L.determinant(); T.col(i) = quadform.array() * p(i); } return Rcpp::NumericMatrix(wrap(T)); } ///////////////////////////////// // [[Rcpp::export]] ///////////////////////////////// Calculate log-likelihood double calc_loglik(NumericMatrix T) { Eigen::Map T_mat = as >(T); Eigen::VectorXd lse = T_mat.array().rowwise().sum().array().log(); return lse.sum(); } ///////////////////////////////// // [[Rcpp::export]] ///////////////////////////////// Calculate means of probability matrix NumericMatrix calc_Pmat(NumericMatrix T) { Eigen::Map T_mat = as >(T); Eigen::MatrixXd T_new = T_mat.array(); for(int i=0;i diff.ll) { if(iterations==1){ pi<-colMeans(P_mat) start<-dimensionssample[sample(nrow(dimensionssample),size=c,replace=FALSE),] mu<-t(start) sigma<-calc_sigma(P_mat,mu,dimensionssample) T<-calc_T(pi,mu,sigma,dimensionssample) loglik[iterations+1] <- calc_loglik(T) #compute log likelihood ll[[c]][counter]<-loglik[iterations+1] if(iterations >= 2){ diff.tmp <- abs(loglik[iterations+1]-loglik[iterations]) } P_mat<-calc_Pmat(T) loglikelihood=loglik[iterations+1] it<-iterations iterations<-iterations+1 counter<-counter+1 }else{ ################## M-step pi<-colMeans(P_mat) mu<-calc_mu(P_mat,dimensionssample) sigma<-calc_sigma(P_mat,mu,dimensionssample) T<-calc_T(pi,mu,sigma,dimensionssample) loglik[iterations+1] <- calc_loglik(T) #compute log likelihood ll[[c]][counter]<- loglik[iterations+1] if(iterations >= 2){ diff.tmp <- abs(loglik[iterations+1]-loglik[iterations]) } P_mat<-calc_Pmat(T) loglikelihood=loglik[iterations+1] it<-iterations iterations<-iterations+1 counter<-counter+1 } } }else if(prior==TRUE){ number_of_inits<-max_inits loglik<- c() loglik[1]<-0 iterations<-1 diff.tmp <- 1000 while(diff.tmp > diff.ll) { print(iterations) if(iterations==1){ pi<-pi_prior[[c]] mu<-mu_prior[[c]] sigma<-sigma_prior[[c]] T<-calc_T(pi,mu,sigma,dimensionssample) loglik[iterations+1] <- calc_loglik(T) #compute log likelihood ll[[c]][counter]<-loglik[iterations+1] P_mat<-calc_Pmat(T) loglikelihood=loglik[iterations+1] it<-iterations iterations<-iterations+1 counter<-counter+1 }else{ pi<-colMeans(P_mat) mu<-calc_mu(P_mat,dimensionssample) sigma<-calc_sigma(P_mat,mu,dimensionssample) T<-calc_T(pi,mu,sigma,dimensionssample) loglik[iterations+1] <- calc_loglik(T) #compute log likelihood ll[[c]][counter]<-loglik[iterations+1] diff.tmp <- abs(loglik[iterations+1]-loglik[iterations]) P_mat<-calc_Pmat(T) loglikelihood=loglik[iterations+1] it<-iterations iterations<-iterations+1 counter<-counter+1 } } } if(number_of_inits==1 && max_inits >=2){ print("First initialization.") act_T[[c]]<-T act_P_mat[[c]]<-P_mat act_pi[[c]]<-pi pis[[number_of_inits]]<-act_pi[[c]] act_mu[[c]]<-mu mus[[number_of_inits]]<-act_mu[[c]] act_sigma[[c]]<-sigma sigmas[[number_of_inits]]<-act_sigma[[c]] act_loglik[[c]]<-loglikelihood act_iterations[[c]]<-it print(paste0(it, " iterations.")) number_of_inits<-number_of_inits+1}else if(number_of_inits>=2 && number_of_initsact_loglik[[c]]){ act_T[[c]]<-T act_P_mat[[c]]<-P_mat act_pi[[c]]<-pi act_mu[[c]]<-mu act_sigma[[c]]<-sigma act_loglik[[c]]<-loglikelihood act_iterations[[c]]<-it print(paste0(it, " iterations.")) }else{ print(paste0(it, " iterations.")) } number_of_inits<-number_of_inits+1}else if(number_of_inits==max_inits){ if(prior==TRUE || max_inits==1){ print("First initialization.") act_T[[c]]<-T act_P_mat[[c]]<-P_mat act_pi[[c]]<-pi act_mu[[c]]<-mu act_sigma[[c]]<-sigma act_loglik[[c]]<-loglikelihood act_iterations[[c]]<-it print(paste0(it, " iterations.")) }else{ print(paste0(number_of_inits, ". initialization")) if(loglikelihood>act_loglik[[c]]){ act_T[[c]]<-T act_P_mat[[c]]<-P_mat act_pi[[c]]<-pi act_mu[[c]]<-mu act_sigma[[c]]<-sigma act_loglik[[c]]<-loglikelihood act_iterations[[c]]<-it print(paste0(it, " iterations.")) }else{ act_T[[c]]<-act_T[[c]] act_P_mat[[c]]<-act_P_mat[[c]] act_pi[[c]]<-act_pi[[c]] act_mu[[c]]<-act_mu[[c]] act_sigma[[c]]<-act_sigma[[c]] act_loglik[[c]]<-act_loglik[[c]] act_iterations[[c]]<-act_iterations[[c]] print(paste0(it, " iterations.")) }} for(k in 1:c){ maxTvec[k]<-max(act_T[[c]][,k]) } ################ BIC[c]<- ((c*6-1)*log(nrow(dimensionssample)))+2*act_loglik[[c]] color_cluster_matrix<-matrix(ncol = 1,nrow=length(act_P_mat[[c]][,1])) for(i in 1:length(act_P_mat[[c]][,1])){ max<-which.max(act_P_mat[[c]][i,]) if(separation==TRUE){ if(max(sqrt(act_sigma[[c]][[max]][1,1]),sqrt(act_sigma[[c]][[max]][2,2]))=alpha*(1 - act_pi[[c]][max])*maxTvec[max]){ color_cluster_matrix[i,1]<-palette[max] } else { color_cluster_matrix[i,1]<-palette_des[max] #color_cluster_matrix[i,1]<-"black" }} else { color_cluster_matrix[i,1]<-"gray73" } }else{ color_cluster_matrix[i,1]<-palette[max] } } colnames(color_cluster_matrix)="color_cluster" plot_matrix<-cbind(dimensionssample,color_cluster_matrix) t<-table(color_cluster_matrix) t_cells<-t[seq(1, length(t), by = 2)] if(separation==TRUE){ print(paste0("Found ",length(t_cells)-1," real cell clusters. ", "Background: ",c - (length(t_cells)-1)," of ", c, " clusters.")) l<-length(t)-1 l_cells<-length(t_cells)-1 names<-rownames(t) sum_foreground<-sum(t[1:l]) sum_foregroundcells<-sum(t_cells[1:l_cells]) sum_background<-t[length(t)] print(paste0("Foreground: ", sum_foreground, " events (", round(sum_foreground*100/n,digits=3)," %) of ", n , " total events.")) print(paste0("Background: ", sum_background, " events (", round(sum_background*100/n,digits=3)," %) of ", n , " total events.")) for(i in 1:l_cells){ print(paste0("Cluster ", i , " : ", t_cells[i], " events(", round(t_cells[i]*100/sum_foreground,digits=3)," %) of ", sum_foreground, " foreground events.")) } print(paste0("Total cell events: ", sum_foregroundcells, " events (", round(sum_foregroundcells*100/sum_foreground,digits=3)," %) of ", sum_foreground , " total foreground events.")) colors<-character() a<-1 for (i in names(t_cells)){ if(i=="gray73"){ colors[a]<-"Background" }else{ colors[a]<-paste0("Cluster ", a) a<-a+1 } } if(img_format=="png"){ png(file=paste0(c,"_clusterssample_separation.png"),bg="white",width = 12, height = 12, units = 'in', res = 300) }else if(img_format=="svg"){ svg(filename=paste0(c,"_clusterssample_separation.svg"),width = 11, height = 11, pointsize = 12, bg = "white") } if(use_log==TRUE){ plot(x=plot_matrix[,1], y=plot_matrix[,2], col=plot_matrix[,3],pch=19,cex=.6,yaxt="n",xaxt="n",log="xy",xlab=ch1,ylab=ch2,xlim=c(min(mFSC),max(mFSC)),ylim=c(min(mFL),max(mFL)),font.lab=2,font=2,family='sans',cex.lab=1.6,cex.axis=1.6) axis(1,at=c(1,10,100,1000,10000), labels=c(expression(paste("10"^"0")),expression(paste("10"^"1")),expression(paste("10"^"2")),expression(paste("10"^"3")),expression(paste("10"^"4")))) axis(2,at=c(1,10,100,1000,10000), labels=c(expression(paste("10"^"0")),expression(paste("10"^"1")),expression(paste("10"^"2")),expression(paste("10"^"3")),expression(paste("10"^"4")))) for(j in 1:c){ if(max(sqrt(act_sigma[[c]][[j]][1,1]),sqrt(act_sigma[[c]][[j]][2,2]))2 && abs(newList$BIC[i] - newList$BIC[i-1]<40)){ print(paste0("Most apropriate number of clusters: ", i-1)) diff=TRUE } } } if(verbose) return (newList) } */