2017-12-18 4 views
1

私はうまく機能していますが、これらのコマンドでカラムの1つを削除し、別のカラムのNA値を1にする:関数内のカラム操作により、データフレームが空または予期しない値に変わります

df$PlateIden <- NULL 

    df$PlateNum[is.na(df$PlateNum)] <- 1 

これらの作業私はデータフレームに直接私の関数の外にそれらを使用しますが、私は関数内でそれらを使用する場合のいずれかの機能がある場合、私のデータフレームは、いずれかの「空」または「1」の値に変身中古。ここで

は私の全体の機能である:ここでは

cleanup_safe <- function(df,addproject,adduser){ 

    colnames(df) <- "FileName" 

    df$RunDate <- str_match(df$FileName, "^[a-zA-Z ]*(\\d+)")[,2] 

    df$RunDate <- ymd(df$RunDate) 

    df$PlateNum <- str_match(df$FileName, "(?<=Plate|plate)[_ ]?(\\d)")[,2] 

    df$PlateIden <- str_match(df$FileName, "(?<=Plate|plate)[_ ]?\\d*[_ ]?([a-zA-Z])")[,2] 

    df$User <- "adduser" 

    df$Project <- "addproject" 

    df <- df[!duplicated(df[,c("User","Project","RunDate","PlateNum")]),] 

    df <- within(df, ID <- cumsum(!duplicated(df[c("User","Project","RunDate")]))) 

    df$PlateIden <- NULL 

    df$PlateNum[is.na(df$PlateNum)] <- 1 

} 

テストデータセット

test <- c("20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 1a, IgG-Biot, MAF.srbx", 
         "20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 1a, IgG-Biot, SAF.srbx", 
         "20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 1a, IgG-Biot.srbx", 
         "20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 1b, IgG-Biot, MAF.srbx", 
         "20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 1b, IgG-Biot, SAF.srbx", 
         "20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 1b, IgG-Biot.srbx", 
         "20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 2, IgG-Biot, MAF.srbx", 
         "20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 2, IgG-Biot, SAF.srbx", 
         "20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 2, IgG-Biot.srbx", 
         "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 1a, IgG-Biot, MAF.srbx", 
         "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 1a, IgG-Biot.srbx", 
         "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 1b, IgG-Biot, MAF.srbx", 
         "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 1b, IgG-Biot, SAF.srbx", 
         "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 1b, IgG-Biot.srbx", 
         "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 2a, IgG-Biot, MAF.srbx", 
         "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 2a, IgG-Biot, SAF.srbx", 
         "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 2a, IgG-Biot.srbx", 
         "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 2b, IgG-Biot, MAF.srbx", 
         "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 2b, IgG-Biot, SAF.srbx", 
         "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 2b, IgG-Biot.srbx", 
         "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 3a, IgG-Biot, MAF.srbx", 
         "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 3a, IgG-Biot, SAF.srbx", 
         "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 3a, IgG-Biot.srbx", 
         "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 3b, IgG-Biot, MAF.srbx", 
         "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 3b, IgG-Biot, SAF.srbx", 
         "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 3b, IgG-Biot.srbx", 
         "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 4a, IgG-Biot, MAF.srbx", 
         "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 4a, IgG-Biot, SAF.srbx", 
         "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 4a, IgG-Biot.srbx", 
         "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 4b, IgG-Biot, MAF.srbx", 
         "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 4b, IgG-Biot, SAF.srbx", 
         "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 4b, IgG-Biot.srbx", 
         "20160812, Optimization, Testing New lot of NGS, Plate 1, IgG-Biot, MAF.srbx", 
         "20160812, Optimization, Testing New lot of NGS, Plate 1, IgG-Biot, SAF.srbx", 
         "20160812, Optimization, Testing New lot of NGS, Plate 1, IgG-Biot.srbx", 
         "20160812, Optimization, Testing New lot of NGS, Plate 2, IgG-Biot, MAF.srbx", 
         "20160812, Optimization, Testing New lot of NGS, Plate 2, IgG-Biot, SAF.srbx", 
         "20160812, Optimization, Testing New lot of NGS, Plate 2, IgG-Biot.srbx", 
         "20160812_a, Optimization, Testing New lot of NGS, Plate 1, IgG-Biot, MAF.srbx", 
         "20160812_a, Optimization, Testing New lot of NGS, Plate 1, IgG-Biot, SAF.srbx", 
         "20160812_a, Optimization, Testing New lot of NGS, Plate 1, IgG-Biot.srbx", 
         "20160812_a, Optimization, Testing New lot of NGS, Plate 2, IgG-Biot, MAF.srbx", 
         "20160812_a, Optimization, Testing New lot of NGS, Plate 2, IgG-Biot, SAF.srbx", 
         "20160812_a, Optimization, Testing New lot of NGS, Plate 2, IgG-Biot.srbx") 

dataframe <- as.data.frame(test) 

dataframe <- cleanup_safe(dataframe,testproject,testuser) 
+0

あなたは私たちとデータを共有することはできますか? – suchait

+0

テストデータセットを追加しました – AwesomeeExpress

+0

あなたのテストデータは1つのカラムを持つ 'data.frame'ですか?あれは正しいですか?サンプルデータを共有したい場合は、 'dput'を使うのがベストです。 –

答えて

1

だから、あなたの全体は次のようになります

setDT(df) 
    df[is.na(PlateNum), PlateNum := 1]. 

df$PlateNum[is.na(df$PlateNum)] <- 1を交換している:

data.table

library(data.table) 

test <- c("20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 1a, IgG-Biot, MAF.srbx", 
      "20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 1a, IgG-Biot, SAF.srbx", 
      "20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 1a, IgG-Biot.srbx", 
      "20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 1b, IgG-Biot, MAF.srbx", 
      "20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 1b, IgG-Biot, SAF.srbx", 
      "20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 1b, IgG-Biot.srbx", 
      "20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 2, IgG-Biot, MAF.srbx", 
      "20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 2, IgG-Biot, SAF.srbx", 
      "20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 2, IgG-Biot.srbx", 
      "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 1a, IgG-Biot, MAF.srbx", 
      "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 1a, IgG-Biot.srbx", 
      "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 1b, IgG-Biot, MAF.srbx", 
      "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 1b, IgG-Biot, SAF.srbx", 
      "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 1b, IgG-Biot.srbx", 
      "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 2a, IgG-Biot, MAF.srbx", 
      "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 2a, IgG-Biot, SAF.srbx", 
      "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 2a, IgG-Biot.srbx", 
      "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 2b, IgG-Biot, MAF.srbx", 
      "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 2b, IgG-Biot, SAF.srbx", 
      "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 2b, IgG-Biot.srbx", 
      "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 3a, IgG-Biot, MAF.srbx", 
      "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 3a, IgG-Biot, SAF.srbx", 
      "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 3a, IgG-Biot.srbx", 
      "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 3b, IgG-Biot, MAF.srbx", 
      "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 3b, IgG-Biot, SAF.srbx", 
      "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 3b, IgG-Biot.srbx", 
      "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 4a, IgG-Biot, MAF.srbx", 
      "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 4a, IgG-Biot, SAF.srbx", 
      "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 4a, IgG-Biot.srbx", 
      "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 4b, IgG-Biot, MAF.srbx", 
      "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 4b, IgG-Biot, SAF.srbx", 
      "20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 4b, IgG-Biot.srbx", 
      "20160812, Optimization, Testing New lot of NGS, Plate 1, IgG-Biot, MAF.srbx", 
      "20160812, Optimization, Testing New lot of NGS, Plate 1, IgG-Biot, SAF.srbx", 
      "20160812, Optimization, Testing New lot of NGS, Plate 1, IgG-Biot.srbx", 
      "20160812, Optimization, Testing New lot of NGS, Plate 2, IgG-Biot, MAF.srbx", 
      "20160812, Optimization, Testing New lot of NGS, Plate 2, IgG-Biot, SAF.srbx", 
      "20160812, Optimization, Testing New lot of NGS, Plate 2, IgG-Biot.srbx", 
      "20160812_a, Optimization, Testing New lot of NGS, Plate 1, IgG-Biot, MAF.srbx", 
      "20160812_a, Optimization, Testing New lot of NGS, Plate 1, IgG-Biot, SAF.srbx", 
      "20160812_a, Optimization, Testing New lot of NGS, Plate 1, IgG-Biot.srbx", 
      "20160812_a, Optimization, Testing New lot of NGS, Plate 2, IgG-Biot, MAF.srbx", 
      "20160812_a, Optimization, Testing New lot of NGS, Plate 2, IgG-Biot, SAF.srbx", 
      "20160812_a, Optimization, Testing New lot of NGS, Plate 2, IgG-Biot.srbx") 

dataframe <- as.data.frame(test) 


    cleanup_safe <- function(df,addproject,adduser){ 

    colnames(df) <- "FileName" 

    df$RunDate <- str_match(df$FileName, "^[a-zA-Z ]*(\\d+)")[,2] 

    df$RunDate <- ymd(df$RunDate) 

    df$PlateNum <- str_match(df$FileName, "(?<=Plate|plate)[_ ]?(\\d)")[,2] 

    df$PlateIden <- str_match(df$FileName, "(?<=Plate|plate)[_ ]?\\d*[_ ]?([a-zA-Z])")[,2] 

    df$User <- "adduser" 

    df$Project <- "addproject" 

    df <- df[!duplicated(df[,c("User","Project","RunDate","PlateNum")]),] 

    df <- within(df, ID <- cumsum(!duplicated(df[c("User","Project","RunDate")]))) 

    df <- df[, !(names(df) %in% "PlateIden"), drop = F] 

    setDT(df) 
    df[is.na(PlateNum), PlateNum := 1] 



} 

dataframe <- cleanup_safe(dataframe,testproject,testuser) 

を使用すると、これは、あなたの出力が得られます: -

FileName RunDate PlateNum User Project ID 
1: 20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 1a, IgG-Biot, MAF.srbx 2016-08-01  1 adduser addproject 1 
2: 20160801, Optimization, gp70_B.CaseA_V1_V2 Coupling Testing, Plate 2, IgG-Biot, MAF.srbx 2016-08-01  2 adduser addproject 1 
3: 20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 1a, IgG-Biot, MAF.srbx 2016-08-02  1 adduser addproject 2 
4: 20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 2a, IgG-Biot, MAF.srbx 2016-08-02  2 adduser addproject 2 
5: 20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 3a, IgG-Biot, MAF.srbx 2016-08-02  3 adduser addproject 2 
6: 20160802, Optimization, New lot of gp70_B.CaseA_V1_V2 Testing, Plate 4a, IgG-Biot, MAF.srbx 2016-08-02  4 adduser addproject 2 
7:     20160812, Optimization, Testing New lot of NGS, Plate 1, IgG-Biot, MAF.srbx 2016-08-12  1 adduser addproject 3 
8:     20160812, Optimization, Testing New lot of NGS, Plate 2, IgG-Biot, MAF.srbx 2016-08-12  2 adduser addproject 3 
+1

ありがとうございました!オリジナルはうまくいっていないのですが、よく分かりました。 – AwesomeeExpress

+0

ようこそ。これを指摘してくれてありがとう。それは変だったので。私はなぜコードの最後の行が関数内部から作業していないのか分かりません。 – suchait

関連する問題