0
phyper機能を使用して濃縮分析を実行しようとしています。Rからです。私が書いたコードは、私に正確な結果を与えることですが、行列のサイズが大きくなると永遠にかかります。以下は、621 * 1860マトリックスの再現可能な例です。しかし、行列のサイズが6210 x 24000に増加すると、複数のコア上で実行してもほぼ一日かかることになります。私は同じものを最適化する方法があるのだろうかと思っています。最適化された濃縮分析の方法
コメントにある共有可能なリンクからRObjectsをダウンロードしてください。
## Main Functions
GetEnrichedAnnotations <- function(DrugDiseaseName,
DrugDiseaseGeneMatrix,
DrugDiseaseFeatureMatrix,
DFDrgDis){ ## Function Begins
TotalGenesCount = nrow(DrugDiseaseGeneMatrix)
## Get the assosciated Genes for each Drug or Disease
DrugDiseaseGenes = GetGeneList(DrugDiseaseName,DFDrgDis)
## Get the only annotations that Genes from the Drug or Disease List
UPDNAnnotations = DrugDiseaseFeatureMatrix[DrugDiseaseName,]
UPDNAnnotations = UPDNAnnotations[UPDNAnnotations > 0]
## First value to the HyperGeometricFunction phyper
GenesFromInput = DrugDiseaseFeatureMatrix[DrugDiseaseName,names(UPDNAnnotations)]
## Second value to the HyperGeometricFunction phyper
GenesinAnnotation = DrugDiseaseGeneMatrix[,names(UPDNAnnotations)]
GenesinAnnotation = apply(GenesinAnnotation,2,sum)
## Third Value to the HyperGeometricFunction phyper
TotalGenes = rep(TotalGenesCount,length(GenesFromInput))
RemainingGenes = TotalGenes - GenesinAnnotation
## Fourth value to the HyperGeometricFunction phyper
NumberOfGenesInDrug = rep(length(DrugDiseaseGenes),length(GenesFromInput))
names(NumberOfGenesInDrug) = names(GenesFromInput)
## Apply Enrichment ANalysis
PValues = phyper(GenesFromInput-1,GenesinAnnotation,RemainingGenes,NumberOfGenesInDrug,lower.tail = FALSE)
AdjustedPvalues = p.adjust(PValues,method = "BH")
EnrichedAnnotations = AdjustedPvalues[AdjustedPvalues <= 0.05]
### When P value is zero, replacing zeros with the minimum value
EnrichedAnnotations[EnrichedAnnotations == 0] = 2.2e-16
EnrichedAnnotations = EnrichedAnnotations[EnrichedAnnotations <= 0.05]
## Get the log value for the adjusted Pvalues
EnrichedAnnotations = -log(EnrichedAnnotations,2)
## This vector consists of all the annotations including Enriched Annotations
TotalAnnotaionsVector = rep(0,ncol(DrugDiseaseGeneMatrix))
names(TotalAnnotaionsVector) = colnames(DrugDiseaseGeneMatrix)
TotalAnnotaionsVector[names(EnrichedAnnotations)] = EnrichedAnnotations
return(TotalAnnotaionsVector)
}##Function Ends
## Get GeneList for a given Diseases
GetGeneList = function(DiseaseName,DFDrgDis){
GeneList = DFDrgDis[DFDrgDis$DrugName == DiseaseName,"Symbol"]
GeneList = unlist(strsplit(GeneList,","))
GeneList = trimws(GeneList)
GeneList = unique(GeneList)
return(GeneList)
}
## Parraleize the Code
numberofCores = parallel::detectCores() - 1
### Closing all the Existing Connections
closeAllConnections()
### Making a Cluster with 8 of 8 available cores
Cluster <- parallel::makeCluster(numberofCores)
### Register the Cluster
doParallel::registerDoParallel(Cluster)
## Please download the RObject from below link
## Please download the three objects for reproducible example
## https://drive.google.com/drive/folders/0Bz9Y4BgZAF7oS2dtVVEwN0Z1Tnc?usp=sharing
GeneAnnotations = readRDS("Desktop/StackOverFlow/GeneAnnotations.rds")
DiseaseAnnotations = readRDS("Desktop/StackOverFlow/DiseaseAnnotations.rds")
Diseases = readRDS("Desktop/StackOverFlow/Diseases.rds")
## Get the Unique Names of Disease List
DisNames = row.names(DiseaseAnnotations)
## Below Function runs the code on parallel to get the Enriched Annotations for Multiple Drugs or Diseases
## Get the Enriched Annotaions for all the Diseases UP Regulated EnricR Genes
library(foreach)
EnrichedAnnotations <- foreach(i=1:length(DisNames), .export= c('GetGeneList'),.packages = "Matrix") %dopar% {
GetEnrichedAnnotations(DisNames[i],GeneAnnotations,DiseaseAnnotations,Diseases)
}
## Convert to Matrix
EnrichedAnnotations <- do.call("cbind",EnrichedAnnotations)
EnrichedAnnotations = t(EnrichedAnnotations)
colnames(EnrichedAnnotations) = colnames(EnrichedAnnotations)
rownames(EnrichedAnnotations) = DisNames
## Stop the Cluster
parallel::stopCluster(Cluster)
を事前計算は、この場合には、外出先ではありませんか? –
TopGOは、上記のスクリプトよりも豊富な行列を生成するのに時間がかかります。 –
私の気持ちは、カスタム関数を修正してメモリにデータを保持し、テストしたい疾患を反復するようにすれば、少しスピードアップできるということです。簡単に言うと、一度に1 DrugDiseaseNameを渡すのではなく、ベクトルを渡して関数が反復する...私はこれを後で調べますが、これは非常に興味深い質問だと思います。 –