2017-01-17 14 views
0

私のデータフレーム内の各名前を分けて、それぞれの列に挿入したいと思います。 取る次のデータフレーム:以下の例では、ある名前を分けて新しい列に挿入するR

N0H0 <-c(rep("N0H0",3)) 
    N1H1 <-c(rep("N1H1",5)) 
    N0H05<-c(rep("N0H05",4)) 
    Out20<-c(rep("20_Out",2)) 
    Out10<-c(rep("10_Out",4)) 
    In5 <-c(rep("5_In",3)) 
    In15 <-c(rep("15_In",3)) 

    df <- data.frame (Field =c(N0H0,N1H1,N0H05), 
       Border =c(Out20,Out10,In5,In15), 
       N  = NA, 
       H  = NA, 
       Number = NA, 
       Out.In = NA 
       ) 

と次のような出力を得たい:

 output <- data.frame (Field = c(N0H0,N1H1,N0H05), 
        Border = c(Out20,Out10,In5,In15), 
        N = c(c(rep("N0",3)),c(rep("N1",5)),c(rep("N0",4))), 
        H = c(c(rep("H0",3)),c(rep("H1",5)),c(rep("H05",4))), 
        Number = c(c(rep("20",2)),c(rep("10",4)),c(rep("5",3)),c(rep("15",3))), 
        Out.In = c(c(rep("Out",6)),c(rep("In",6)))) 

答えて

1

我々は列

を分離する tidyrから extractseparateを使用することができ
library(dplyr) 
library(tidyr) 
df %>% 
    select(Field, Border) %>% 
    extract(Field, into = c("N", "H"), "^([^0-9]*\\d+)(.*)", remove = FALSE) %>% 
    separate(Border, into = c("Number", "Out.In"), remove = FALSE) %>% 
    select_(.dots = names(output)) 
# Field Border N H Number Out.In 
#1 N0H0 20_Out N0 H0  20 Out 
#2 N0H0 20_Out N0 H0  20 Out 
#3 N0H0 10_Out N0 H0  10 Out 
#4 N1H1 10_Out N1 H1  10 Out 
#5 N1H1 10_Out N1 H1  10 Out 
#6 N1H1 10_Out N1 H1  10 Out 
#7 N1H1 5_In N1 H1  5  In 
#8 N1H1 5_In N1 H1  5  In 
#9 N0H05 5_In N0 H05  5  In 
#10 N0H05 15_In N0 H05  15  In 
#11 N0H05 15_In N0 H05  15  In 
#12 N0H05 15_In N0 H05  15  In 

またはu 、base Rを歌うlapplyと第2列をループ、subを使用して区切りを作成し、read.csvで文字列を読み取り、cbindlist要素と基地R機能バック第2

df[-(1:2)] <- do.call(cbind, lapply(df[1:2], 
     function(x) read.csv(text=sub("(\\d+)_*", "\\1,", x), 
     header=FALSE, stringsAsFactors=FALSE))) 
df 
# Field Border N H Number Out.In 
#1 N0H0 20_Out N0 H0  20 Out 
#2 N0H0 20_Out N0 H0  20 Out 
#3 N0H0 10_Out N0 H0  10 Out 
#4 N1H1 10_Out N1 H1  10 Out 
#5 N1H1 10_Out N1 H1  10 Out 
#6 N1H1 10_Out N1 H1  10 Out 
#7 N1H1 5_In N1 H1  5  In 
#8 N1H1 5_In N1 H1  5  In 
#9 N0H05 5_In N0 H05  5  In 
#10 N0H05 15_In N0 H05  15  In 
#11 N0H05 15_In N0 H05  15  In 
#12 N0H05 15_In N0 H05  15  In 
1

以外の列に割り当て試してみることができます:

output <- df 
output[, 5:6] <- do.call(rbind, strsplit(as.character(df$Border), "_")) 
output[, 3:4] <- do.call(rbind, strsplit(as.character(df$Field), "H")) 
output$H <- paste0("H", output$H) 
output 
    Field Border N H Number Out.In 
1 N0H0 20_Out N0 H0  20 Out 
2 N0H0 20_Out N0 H0  20 Out 
3 N0H0 10_Out N0 H0  10 Out 
4 N1H1 10_Out N1 H1  10 Out 
5 N1H1 10_Out N1 H1  10 Out 
6 N1H1 10_Out N1 H1  10 Out 
7 N1H1 5_In N1 H1  5  In 
8 N1H1 5_In N1 H1  5  In 
9 N0H05 5_In N0 H05  5  In 
10 N0H05 15_In N0 H05  15  In 
11 N0H05 15_In N0 H05  15  In 
12 N0H05 15_In N0 H05  15  In 
関連する問題