-1
Rを使用してしきい値を基にしてテーブルを構築することができないため、この問題は困惑しています。groupby(set_a,set_b
) rank_of_values
が.80より大きいか(同じペーストを同時に利用するか)に基づいてvalue
を選択してテーブルを作成します。私の基準は、最大rank_of_values
を選択し、テーブルにtrueの場合はテーブルにvalue
を返します。その部分が偽である場合は、最初のテーブルと第2テーブルの両方にrank_of_values
= value
を追加します。 set_b
にグループが1つしかないときに私はこれを以前に把握することができましたが、今では複数の問題が発生しています。私も警告を理解しようとしましたが、これまでに長さのエラーはあったけど、これまで運がなかったので困惑しました。どんな助けやアドバイスも感謝します。Rでしきい値を使用してサブセットを作成し、forループを使用して隣接する行を返す
library(data.table)
#create sample data
set_a <- c("a","a","a","a","b","b","b","b","c","c","c","c","a","a","a","a","b","b","b","b","c","c","c","c","a","a","a","a","b","b","b","b","c","c","c","c")
set_b <- c("red","red","red","red","red","red","red","red","red","red","red","red","blue","blue","blue","blue","blue","blue","blue","blue","blue","blue","blue","blue","green","green","green","green","green","green","green","green","green","green","green","green")
#value <- c(sample(1:100,size = 36,replace = T))
value <- c(19,15,3,62,61,17,2,31,16,511,2,64,81,51,58,94,81,79,23,35,9,40,54,78,78,56,11,58,99,74,36,58,5,47,39,98)
a = data.frame(set_a,set_b,value)
a = data.table(a)
#This function calculates the average of the counts
a = a[,rank_of_values:= value/sum(value), by=list(set_a,set_b)]
head(a)
mn <- c() #create matrix to fill
mn0 <- c() #temp matrix
colu = unique(as.character(a$set_a))
colu2 = unique(as.character(a$set_b))
for (i in seq_along(colu))
{
#subset the data table for the set_a:
t = subset(a, set_a == colu[i])
for (i in seq_along(colu2)){
t2 = subset(t, set_b == colu2[i])
#subsetting the data by the set_b
if(t2$rank_of_values > .8){
mn <- cbind(as.character(t2$set_a[i]),paste0(t2$set_b[i],"_a"),t2$value[i])
mn0 <- rbind(mn,mn0)
mn2 <- cbind(as.character(t2$set_a[i]),paste0(t2$set_b[i],"_b"),t2$value[i])
mn0 <- rbind(mn0,mn2)
}
else
{
t2[order(-rank_of_values)][,.SD[1:2]] #create a second data table to select for
#order the data.table and
mn3 <- cbind(as.character(t2$set_a[1]),paste0(t2$set_b[1],"_a"),t2$value[1])
mn0 <- rbind(mn0,mn3)
mn4 <- cbind(as.character(t2$set_a[2]),paste0(t2$set_b[2],"_b"),t2$value[2])
mn0 <- rbind(mn0,mn4)
}
}
}
mn0
現在の結果:
[,1] [,2] [,3]
[1,] "a" "red_a" "19"
[2,] "a" "red_b" "15"
[3,] "a" "blue_a" "81"
[4,] "a" "blue_b" "51"
[5,] "a" "green_a" "78"
[6,] "a" "green_b" "56"
[7,] "b" "red_a" "61"
[8,] "b" "red_b" "17"
[9,] "b" "blue_a" "81"
[10,] "b" "blue_b" "79"
[11,] "b" "green_a" "99"
[12,] "b" "green_b" "74"
[13,] "c" "red_a" "16"
[14,] "c" "red_b" "511"
[15,] "c" "blue_a" "9"
[16,] "c" "blue_b" "40"
[17,] "c" "green_a" "5"
[18,] "c" "green_b" "47"
望ましい結果:
[,1] [,2] [,3]
[1,] "a" "red_a" "62"
[2,] "a" "red_b" "19"
[3,] "a" "blue_a" "94"
[4,] "a" "blue_b" "81"
[5,] "a" "green_a" "78"
[6,] "a" "green_b" "58"
[7,] "b" "red_a" "61"
[8,] "b" "red_b" "31"
[9,] "b" "blue_a" "81"
[10,] "b" "blue_b" "79"
[11,] "b" "green_a" "99"
[12,] "b" "green_b" "74"
[13,] "c" "red_a" "511"
[14,] "c" "red_b" "64"
[15,] "c" "blue_a" "78"
[16,] "c" "blue_b" "54"
[17,] "c" "green_a" "98"
[18,] "c" "green_b" "47"
警告メッセージを受信:
Warning messages:
1: In if (t2$rank_of_values > 0.8) { :
the condition has length > 1 and only the first element will be used
2: In if (t2$rank_of_values > 0.8) { :
the condition has length > 1 and only the first element will be used
3: In if (t2$rank_of_values > 0.8) { :
the condition has length > 1 and only the first element will be used
4: In if (t2$rank_of_values > 0.8) { :
the condition has length > 1 and only the first element will be used
5: In if (t2$rank_of_values > 0.8) { :
the condition has length > 1 and only the first element will be used
6: In if (t2$rank_of_values > 0.8) { :
the condition has length > 1 and only the first element will be used
7: In if (t2$rank_of_values > 0.8) { :
the condition has length > 1 and only the first element will be used
8: In if (t2$rank_of_values > 0.8) { :
the condition has length > 1 and only the first element will be used
9: In if (t2$rank_of_values > 0.8) { :
the condition has length > 1 and only the first element will be used