2017-10-10 22 views
-1

私は自分のRコードを微調整することができるリソースに誰かを導くことができるかどうか調べています。私はRを使って同じ変数を持つ多くのファイルを分析しています。最後に、すべての出力テーブルを追加したいと思います。私は12テーブルあり、出力テーブルは1つしかありません。どのようにproeceedするかについての任意のアイデア。 要約すると、 forループを使用して複数の出力がある場合、ファイルを追加して一意のファイルを作成する方法。複数のファイルから複数の出力を追加するにはどうすればよいですか?Rのループを使用して分析する

setwd("C:/Data/Key/Spring2017/era"); 

subjects=c("ALGEBRA1","BIOLOGY","LITERATURE"); 

modules=c("module1","module2"); 

    `modes=c("O","P");` 

#loopcount<-1; 



for(i in 1:length(subjects)){ 

    for(k in 1:length(modes)){ 

     for(m in 1:length(modules)){ 

    #i=1; 
    #k=1; 
    #m=1 

        subj <- subjects[i]; 
        mode <- modes[k]; 
        module <- modules[m]; 

     cat("Subj:",subj,",mode: ",mode,",module: ",module,"\n"); 

     ###Reading the analyses output; 

     outfile<- 
paste("output/erasure_Mean_",mode,"_",module,"_",subj,".csv",sep=""); 


    ###Reading in the datafiles; 

    infile<-paste("data/erasure_",mode,"_",module,"_",subj,".csv",sep=""); 


    dat<-read.csv(infile,header=TRUE,as.is=T); 


    newid<-paste(dat[,"dis"],dat[,"sch"]); 

    dat<-data.frame(dat,newid); 

    head(dat); 


    count<-function(dat){ 
         length(na.omit(dat)) 
         } 

    ##computing students count per school for Algebra1; 

    temp<-aggregate(dat[,"subj"],by=list(dat[,"newid"]),FUN="count") 
    colnames(temp)=c("newid","N") 

    #removing duplicate id; 
#y<-dat[!duplicated(dat[,"newid"]),c("newid","dis","disname","schname","drcid","subj","mode","module")] 
y<-dat[!duplicated(dat[,"newid"]),c(1,2,3,4,7:9,15)] 

head(y) 

MinN<-10; 
out<-merge(y,temp,by="newid") 

head(out) 


#count of the students with FivePlus above; 
temp<-aggregate(dat[,"FivePlus"],list(dat[,"newid"]),FUN=sum) 
colnames(temp)<-c("newid","FivePlusN"); 
out<-merge(out,temp,by="newid") 
temp<-aggregate(dat[,"FivePlus"],list(dat[,"newid"]),FUN=mean); 
colnames(temp)<-c("newid","FivePlusPer"); 
temp[,2]=temp[,2]*100 
out<-merge(out,temp,by="newid"); 

#state erasure mean 
gmean<-mean(dat[,"tot_wr"]) 
gsd<-sd(dat[,"tot_wr"]) 
gn<-nrow(dat) 
varused<-c("tot_wr"); 
x<-dat[!duplicated(dat[,"subj"]),c(7:9)] 
pre.x<-data.frame(x,varused,gmean,gsd,gn) 

##Statistics for wr; 
#mean of wr by school 

temp<-aggregate(dat[,"tot_wr"],list(dat[,"newid"]),FUN=mean) 
colnames(temp)<-c("newid",paste("tot_wr","mean",sep=".")) 
head(temp) 

#mean of wr per test 
pertest<-temp[,2] 
pertest<-as.matrix(pertest,ncol=1) 
colnames(pertest)<-c(paste("tot_wr","pertest",sep=".")) 
temp<-data.frame(temp,pertest) 
out<-merge(out,temp,by="newid") 

#variance of WR 
temp<-aggregate(dat[,"tot_wr"], list(dat[,"newid"]),sd) 

#The standard deviation used is across item types by school 
colnames(temp)<-c("newid",paste("tot_wr", "sd",sep=".")) 
out<-merge(out,temp,by="newid") 

#z score of wr 
Z<-(out[,"tot_wr.pertest"]-gmean)/(out[,"tot_wr.sd"]/sqrt(out[,"N"])) 
out<-data.frame(out,Z) 

##ncol(out) determines the column to rename using paste function. 
colnames(out)[ncol(out)]<-paste("tot_wr","Z",sep=".") 

##p value of wr 
tdf<-out[,"N"]-1 

##lower.tail logical if True, prob are P[X<=x],otherwise P[X>x] 
##log.p if true, probabilities p are given as log(p) 
pval<-pt(Z,tdf,lower.tail=F,log.p=FALSE) 
out<-data.frame(out,pval) 
colnames(out)[ncol(out)]<-paste("tot_wr","pval",sep=".") 

#threat for wr 
threat<-matrix(0,ncol=1,nrow=nrow(out)) 
prethreat<-as.matrix(round(abs(1.086*log(pval/(1-pval))),digits=4),ncol=1, nrow=nrow(out)) 


for(threatloop in 1:nrow(out)){ 
    if (out[threatloop, paste("tot_wr","pval",sep=".")]< 0.5 & 
    is.na(out[threatloop,paste("tot_wr","pval",sep=".")])==F){ 
    threat[threatloop,]<-prethreat[threatloop,] 
    } 
    } 

threat <- as.matrix(threat,ncol=1) 
if(length(threat[which(threat[,1]> 49.9),1])>0){ 
    threat[which(threat[,1]> 49.9),1]<-49.9 
    } 

colnames(threat)<-paste("tot_wr","threat",sep=".") 
out<-cbind(out,threat) 

if (length(which(out[,paste("tot_wr","threat",sep=".")] > 9.9 
      & out[,paste("tot_wr","threat",sep=".")] < 10))> 0){ 
      out[which(out[,paste("tot_wr","threat",sep=".")]< 10),paste("tot_wr","threat",sep=".")] < -9.9 
    } 


if (length(which(out[,"N"]< MinN))> 0){ 
    out[which(out[,"N"]< MinN),paste("tot_wr","threat",sep=".")]<- NA 

    } 

if(length(which(out[,paste("tot_wr","Z",sep=".")]=="-Inf")) > 0){ 
    out[which(out[,paste("tot_wr","Z",sep=".")]=="-Inf"),paste("tot_wr","Z",sep=".")]<- -999999 
} 

if (length(which(out[,paste("tot_wr","Z",sep=".")]=="Inf")) > 0){ 
    out[which(out[,paste("tot_wr","Z",sep=".")]=="Inf"),paste("tot_wr","Z",sep=".")] <- 999999 
} 

eras<-rbind(pre.x,pre.x1,pre.x2,pre.x3) 

write.table(out,outfile,quote=F,append=F,row.names=F,col.name=T,na= "",sep=",") 



     } 

    } 

} 
+0

再現性のあるデータを提供し、質問したい質問をターゲットにするコードを簡素化するのに役立つかもしれません。 – RealViaCauchy

+0

コードを実行できないようにデータがありません。また、問題の内容を正確に伝えるのは困難です。あなたが持っている問題だけを示す単純な再現可能な例に煮詰めてください。 – Spacedman

答えて

0

機能を使用してプロセスを一般化を検討し、すべての可能性の組み合わせのexpand.gridデータフレームを通じて1回の繰り返しで、ネストされたforループを置き換える: 以下のコードを参照してください。次に、mapplyまたはそのラッパーMapを使用して、汎用関数にパラメータを渡します。

# DATAFRAME OF ALL POSSIBLE COMBINATIONS OF NESTED for LOOP 
loopdf <- expand.grid(mode = c("O","P"), 
         module = c("module1","module2"), 
         subj = c("ALGEBRA1","BIOLOGY","LITERATURE")) 

# USER-DEFINED FUNCTION nearly same code but two changes at beginning and end: 
# 1. Remove assignment of mode, module, subj since they are passed in as parameters 
# 2. Replace write.table with a return() since you will output file outside of function 
table_process <- function(mode, module, subj){  

    cat("Subj:",subj,",mode: ",mode,",module: ",module,"\n") 

    infile<-paste("data/erasure_",mode,"_",module,"_",subj,".csv",sep="") 
    # ... EXACT SAME CODE EXCEPT LAST LINE 

    return(out) 
} 

# LIST OF DATAFRAMES 
dfList <- Map(table_process, loopdf$mode, loopdf$module, loopdf$subj) 
# EQUIVALENTLY 
# dfList <- mapply(table_process, loopdf$mode, loopdf$module, loopdf$subj, SIMPLIFY = FALSE) 

# ROW BIND ALL DF ELEMENTS INTO ONE DATAFRAME (ASSUMED SAME COLUMN LENGTH AND NAMES) 
finaldf <- do.call(rbind, dfList) 

# OUTPUT SINGLE FILE 
write.table(finaldf, "final.csv", quote=F, append=F, row.names=F, col.name=T, na= "", sep=",")  
# EQUIVALENTLY WITHOUT sep ARG 
# write.csv(finaldf, "final.csv", quote=F, append=F, row.names=F, col.name=T, na= "") 
関連する問題