2016-04-30 4 views
0

私の質問はRでコード化されていますので、Rでコード化されたヘルプ/ソリューションは非常に高く評価されます。各行について、最も高い値を持つ上位5列の列名を出力しますが、0は除外します。

私は次の行列があります。

df <- structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.018, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.02, 0, 0, 0.034, 0, 0.008, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0.002, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0.002, 0, 0.07, 0, 0, 0, 0, 0.006, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.006, 0, 0, 0.004, 0, 
0.002, 0, 0, 0.006, 0, 0, 0, 0, 0.002, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0.006, 0, 0, 0.008, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0.004, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0.002, 0, 0.054, 0, 0, 0, 0, 0.004, 0, 0, 0.028, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0.034, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.002, 0, 0, 
0.012, 0, 0, 0, 0, 0, 0, 0.002, 0, 0, 0, 0, 0.008, 0, 0, 0.01, 
0, 0.008, 0, 0, 0, 0, 0.006, 0, 0, 0.018, 0, 0.002, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0.018, 0, 0.196, 0.012, 0, 0, 0, 0, 0, 0, 0.094, 
0, 0, 0.002, 0, 0, 0, 0, 0, 0, 0.014, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0.034, 0, 0, 0, 0, 0.006, 0, 0, 0.004, 0, 0.132, 
0, 0.002, 0.396, 0, 0.002, 0, 0, 0, 0, 0.012, 0, 0, 0.004, 0, 
0.082, 0, 0, 0.038, 0, 0.014, 0, 0, 0, 0.006, 0.1, 0.006, 0.616, 
0.174, 0.12, 0, 0.12, 0.036, 0, 0, 0, 0, 0, 0, 0, 0.002, 0, 0, 
0, 0, 0.002, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0.008, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.002, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.002, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.004, 0, 0, 0.036, 0, 0.042, 0, 
0.002, 0.058, 0, 0, 0, 0, 0.002, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0.002, 0, 0, 0, 0.874, 0.058, 0.874, 0.148, 0.1, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0.002, 0, 0, 0, 0, 0, 0, 0.06, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0.03, 0, 0, 0.006, 0, 0.022, 0, 0, 
0.036), .Dim = c(5L, 100L), .Dimnames = list(c("4", "5", "8", 
"9", "14"), c("X0", "X1", "X2", "X3", "X4", "X5", "X6", "X7", 
"X8", "X9", "X10", "X11", "X12", "X13", "X14", "X15", "X16", 
"X17", "X18", "X19", "X20", "X21", "X22", "X23", "X24", "X25", 
"X26", "X27", "X28", "X29", "X30", "X31", "X32", "X33", "X34", 
"X35", "X36", "X37", "X38", "X39", "X40", "X41", "X42", "X43", 
"X44", "X45", "X46", "X47", "X48", "X49", "X50", "X51", "X52", 
"X53", "X54", "X55", "X56", "X57", "X58", "X59", "X60", "X61", 
"X62", "X63", "X64", "X65", "X66", "X67", "X68", "X69", "X70", 
"X71", "X72", "X73", "X74", "X75", "X76", "X77", "X78", "X79", 
"X80", "X81", "X82", "X83", "X84", "X85", "X86", "X87", "X88", 
"X89", "X90", "X91", "X92", "X93", "X94", "X95", "X96", "X97", 
"X98", "X99"))) 

私は出力に上位5つの値を持つ列の列名をしたい、と0を除外。

myfunction <- function(x, names) { 
    ord <- order(x, decreasing = TRUE)[1:5] 
    names[ord] 
} 

col <- colnames(df) 
output <- apply(df, 1, myfunction, names = col) 
output <- t(output) 

をしかし、あなたは出力から見ることができるように、それはX0、X1 ...などが含まれています

現在、私が持っています。次の最高値が0のときはいつでもゼロを返します。ゼロ以外の値に対してのみ列名を抽出するようにコードを修正するにはどうしたらいいですか?

ご連絡いただきありがとうございます。

EDIT:

outputの電流出力である:

[,1] [,2] [,3] [,4] [,5] 
4 "X91" "X65" "X64" "X0" "X1" 
5 "X59" "X64" "X48" "X62" "X12" 
8 "X91" "X65" "X64" "X0" "X1" 
9 "X64" "X46" "X91" "X65" "X59" 
14 "X59" "X64" "X91" "X82" "X62" 

Iを出力したい:

[,1] [,2] [,3] [,4] [,5] 
4 "X91" "X65" "X64" "" "" 
5 "X59" "X64" "X48" "X62" "X12" 
8 "X91" "X65" "X64" "" "" 
9 "X64" "X46" "X91" "X65" "X59" 
14 "X59" "X64" "X91" "X82" "X62" 

4,8ため、唯一トップ3の値であるため非ゼロ。

ありがとうございます!

答えて

1

'df'の行をループし、0( 'i1')でない値のインデックスを作成し、 'i1'の 'x'ベースをサブセット化し、order getly( 'i1')数字インデックス( 'i2')は、最初の5つの要素をheadで抽出します。出力は、一部の行が5以上の非零要素を持つため、listになります。 list介してループ、等しいrbindlist要素を長さを維持するために端部にパッドNAslengthlistの要素の最大lengthを割り当て、0

lst <- apply(df, 1, FUN = function(x) { 
     i1 <- x!=0 
     i2 <- order(-x[i1]) 
     head(colnames(df)[i1][i2], 5)}) 
res <- do.call(rbind,lapply(lst, `length<-`, max(lengths(lst)))) 
replace(res, is.na(res), "") 
# [,1] [,2] [,3] [,4] [,5] 
#4 "X91" "X65" "X64" "" "" 
#5 "X59" "X64" "X48" "X62" "X12" 
#8 "X91" "X65" "X64" "" "" 
#9 "X64" "X46" "X91" "X65" "X59" 
#14 "X59" "X64" "X91" "X82" "X62" 
replace NA必要な場合