2017-03-08 16 views
1

私の同様のquestionの@Axeman正解との関係で質問しています。dputが私のdataframeoutputです。列名に基づいて値を貼り付けます

2組の変数(同じ長さN)から文字列のpasteを実行しようとしています。

var1.x, var2.x, var1.y, var2.yの文字列はvar1, var2になります。 varN.x, varN.yをサポートできるコードを探していますvarN

私は@ Axemanの答えに適応しましたが、N個の変数を貼り付けるときに縮尺通りではありません。

df[,3:4] <- mapply(function(x, y) paste0(na.omit(c(x, y)), collapse = ''), 
        as.character(df[,3:4]), as.character(df[,5:6])) 
output <- df[1:4] 

df <- structure(list(factor1 = structure(c(1L, 1L, 2L, 1L, 1L, 2L, 
2L, 1L), .Label = c("f1", "f2"), class = "factor"), factor2 = c(1L, 
2L, 1L, 3L, 4L, 2L, 3L, 5L), var1.x = structure(c(1L, 2L, NA, 
3L, 4L, 6L, 7L, 5L), .Label = c("a", "d", "g", "h", "j", "t", 
"y"), class = "factor"), var2.x = structure(c(NA, 1L, 2L, NA, 
1L, 2L, 2L, 2L), .Label = c("g", "s"), class = "factor"), var1.y = structure(c(4L, 
1L, NA, 2L, 2L, 2L, NA, 3L), .Label = c("f", "g", "h", "x"), class = "factor"), 
    var2.y = structure(c(4L, 2L, 2L, 1L, NA, 3L, 3L, 3L), .Label = c("a", 
    "g", "h", "t"), class = "factor")), .Names = c("factor1", 
"factor2", "var1.x", "var2.x", "var1.y", "var2.y"), class = "data.frame", row.names = c(NA, 
-8L)) 

ベースRを経由して、それについて移動する
output <- structure(list(factor1 = structure(c(1L, 1L, 2L, 1L, 1L, 2L, 
2L, 1L), .Label = c("f1", "f2"), class = "factor"), factor2 = c(1L, 
2L, 1L, 3L, 4L, 2L, 3L, 5L), var1 = structure(c(1L, 2L, NA, 3L, 
4L, 6L, 7L, 5L), .Label = c("ax", "df", "gg", "hg", "js", "tg", 
"y"), class = "factor"), var2 = structure(c(7L, 3L, 5L, 1L, 2L, 
6L, 6L, 4L), .Label = c("a", "g", "gg", "hh", "sg", "sh", "t" 
), class = "factor")), .Names = c("factor1", "factor2", "var1", 
"var2"), class = "data.frame", row.names = c(NA, -8L)) 

答えて

1

一つの方法、

#make sure the columns you are pasting are characters 
df[-c(1:2)] <- lapply(df[-c(1:2)], as.character) 

#replace NA with '' to avoid pasting problems 
df[is.na(df)] <- '' 

#create a vector with unique column names 
ind <- unique(sub('\\..*', '', names(df[-c(1:2)]))) 

#create a matrix matching each column name with ind, in order to use as index 
m1 <- t(sapply(ind, grepl, names(df[-c(1:2)]))) 

#apply paste0 in columns based on index matrix m1. 
df1 <- setNames(data.frame(sapply(seq(nrow(m1)), function(i) 
       do.call(paste0, df[-c(1:2)][m1[i,]]))), paste0('Var', seq(nrow(m1)))) 

#bind it back to first two columns of original df and change '' to NA 
df <- cbind(df[1:2], df1) 
df[df == ''] <- NA 

df 
# factor1 factor2 Var1 Var2 
#1  f1  1 ax t 
#2  f1  2 df gg 
#3  f2  1 <NA> sg 
#4  f1  3 gg a 
#5  f1  4 hg g 
#6  f2  2 tg sh 
#7  f2  3 y sh 
#8  f1  5 jh sh 
関連する問題