2016-10-12 7 views
-1

Rを使用してしきい値を基にしてテーブルを構築することができないため、この問題は困惑しています。groupby(set_a,set_brank_of_valuesが.80より大きいか(同じペーストを同時に利用するか)に基づいてvalueを選択してテーブルを作成します。私の基準は、最大rank_of_valuesを選択し、テーブルにtrueの場合はテーブルにvalueを返します。その部分が偽である場合は、最初のテーブルと第2テーブルの両方にrank_of_values = valueを追加します。 set_bにグループが1つしかないときに私はこれを以前に把握することができましたが、今では複数の問題が発生しています。私も警告を理解しようとしましたが、これまでに長さのエラーはあったけど、これまで運がなかったので困惑しました。どんな助けやアドバイスも感謝します。Rでしきい値を使用してサブセットを作成し、forループを使用して隣接する行を返す

library(data.table) 


#create sample data 
set_a <- c("a","a","a","a","b","b","b","b","c","c","c","c","a","a","a","a","b","b","b","b","c","c","c","c","a","a","a","a","b","b","b","b","c","c","c","c") 
set_b <- c("red","red","red","red","red","red","red","red","red","red","red","red","blue","blue","blue","blue","blue","blue","blue","blue","blue","blue","blue","blue","green","green","green","green","green","green","green","green","green","green","green","green") 
#value <- c(sample(1:100,size = 36,replace = T)) 
value <- c(19,15,3,62,61,17,2,31,16,511,2,64,81,51,58,94,81,79,23,35,9,40,54,78,78,56,11,58,99,74,36,58,5,47,39,98) 
a = data.frame(set_a,set_b,value) 

a = data.table(a) 

#This function calculates the average of the counts 
a = a[,rank_of_values:= value/sum(value), by=list(set_a,set_b)] 
head(a) 


mn <- c() #create matrix to fill 
mn0 <- c() #temp matrix 
colu = unique(as.character(a$set_a)) 
colu2 = unique(as.character(a$set_b)) 

for (i in seq_along(colu)) 
{ 
    #subset the data table for the set_a: 
    t = subset(a, set_a == colu[i]) 

    for (i in seq_along(colu2)){ 
    t2 = subset(t, set_b == colu2[i]) 
    #subsetting the data by the set_b 
    if(t2$rank_of_values > .8){ 
     mn <- cbind(as.character(t2$set_a[i]),paste0(t2$set_b[i],"_a"),t2$value[i]) 
     mn0 <- rbind(mn,mn0) 
     mn2 <- cbind(as.character(t2$set_a[i]),paste0(t2$set_b[i],"_b"),t2$value[i]) 
     mn0 <- rbind(mn0,mn2) 

    } 
    else 
    { 
     t2[order(-rank_of_values)][,.SD[1:2]] #create a second data table to select for 
     #order the data.table and 

     mn3 <- cbind(as.character(t2$set_a[1]),paste0(t2$set_b[1],"_a"),t2$value[1]) 

     mn0 <- rbind(mn0,mn3) 
     mn4 <- cbind(as.character(t2$set_a[2]),paste0(t2$set_b[2],"_b"),t2$value[2]) 
     mn0 <- rbind(mn0,mn4) 
    } 
    } 
} 

mn0 

現在の結果:

 [,1] [,2]  [,3] 
[1,] "a" "red_a" "19" 
[2,] "a" "red_b" "15" 
[3,] "a" "blue_a" "81" 
[4,] "a" "blue_b" "51" 
[5,] "a" "green_a" "78" 
[6,] "a" "green_b" "56" 
[7,] "b" "red_a" "61" 
[8,] "b" "red_b" "17" 
[9,] "b" "blue_a" "81" 
[10,] "b" "blue_b" "79" 
[11,] "b" "green_a" "99" 
[12,] "b" "green_b" "74" 
[13,] "c" "red_a" "16" 
[14,] "c" "red_b" "511" 
[15,] "c" "blue_a" "9" 
[16,] "c" "blue_b" "40" 
[17,] "c" "green_a" "5" 
[18,] "c" "green_b" "47" 

望ましい結果:

 [,1] [,2]  [,3] 
[1,] "a" "red_a" "62" 
[2,] "a" "red_b" "19" 
[3,] "a" "blue_a" "94" 
[4,] "a" "blue_b" "81" 
[5,] "a" "green_a" "78" 
[6,] "a" "green_b" "58" 
[7,] "b" "red_a" "61" 
[8,] "b" "red_b" "31" 
[9,] "b" "blue_a" "81" 
[10,] "b" "blue_b" "79" 
[11,] "b" "green_a" "99" 
[12,] "b" "green_b" "74" 
[13,] "c" "red_a" "511" 
[14,] "c" "red_b" "64" 
[15,] "c" "blue_a" "78" 
[16,] "c" "blue_b" "54" 
[17,] "c" "green_a" "98" 
[18,] "c" "green_b" "47" 

警告メッセージを受信:

Warning messages: 
1: In if (t2$rank_of_values > 0.8) { : 
    the condition has length > 1 and only the first element will be used 
2: In if (t2$rank_of_values > 0.8) { : 
    the condition has length > 1 and only the first element will be used 
3: In if (t2$rank_of_values > 0.8) { : 
    the condition has length > 1 and only the first element will be used 
4: In if (t2$rank_of_values > 0.8) { : 
    the condition has length > 1 and only the first element will be used 
5: In if (t2$rank_of_values > 0.8) { : 
    the condition has length > 1 and only the first element will be used 
6: In if (t2$rank_of_values > 0.8) { : 
    the condition has length > 1 and only the first element will be used 
7: In if (t2$rank_of_values > 0.8) { : 
    the condition has length > 1 and only the first element will be used 
8: In if (t2$rank_of_values > 0.8) { : 
    the condition has length > 1 and only the first element will be used 
9: In if (t2$rank_of_values > 0.8) { : 
    the condition has length > 1 and only the first element will be used 

答えて

0

私はフォーラムを検索した後、私の質問に答えました。

mn <- c() #create matrix to fill 
mn0 <- c() #temp matrix 
colu = unique(as.character(a$set_a)) 
colu2 = unique(as.character(a$set_b)) 

for (i in colu){ 
    for (j in colu2){ 

    t = a[a$set_a %in% i & a$set_b %in% j,][order(-rank_of_values)] 
    for(z in t$rank_of_values[[nrow(t)]]){ 
     if(t$rank_of_values[[z]] > .8){ 
     print(z) 
     mn <- cbind(as.character(t$set_a[[z]]),paste0(t$set_b[[z]],"_a"),t$value[[z]]) 
     mn0 <- rbind(mn,mn0) 
     mn2 <- cbind(as.character(t$set_a[[z]]),paste0(t$set_b[[z]],"_b"),t$value[[z]]) 
     mn0 <- rbind(mn0,mn2) 
     } 
     else{ 
     #create a second data table to select for 
     #order the data.table and 

     mn3 <- cbind(as.character(t$set_a[[1]]),paste0(t$set_b[[1]],"_a"),t$value[[1]]) 

     mn0 <- rbind(mn0,mn3) 
     mn4 <- cbind(as.character(t$set_a[[2]]),paste0(t$set_b[[2]],"_b"),t$value[[2]]) 
     mn0 <- rbind(mn0,mn4) 
     } 
    } 
    } 
} 

結果

 [,1] [,2]  [,3] 
[1,] "a" "red_a" "62" 
[2,] "a" "red_b" "19" 
[3,] "a" "blue_a" "94" 
[4,] "a" "blue_b" "81" 
[5,] "a" "green_a" "78" 
[6,] "a" "green_b" "58" 
[7,] "b" "red_a" "61" 
[8,] "b" "red_b" "31" 
[9,] "b" "blue_a" "81" 
[10,] "b" "blue_b" "79" 
[11,] "b" "green_a" "99" 
[12,] "b" "green_b" "74" 
[13,] "c" "red_a" "511" 
[14,] "c" "red_b" "64" 
[15,] "c" "blue_a" "78" 
[16,] "c" "blue_b" "54" 
[17,] "c" "green_a" "98" 
[18,] "c" "green_b" "47" 
関連する問題