2017-04-07 7 views
0

私は同じデータフレーム内の別の列にあるデータフレームの1つの列から、その文字列のインスタンスを識別して置き換える方法を見つけようとしています。このケースでは、私が取り上げたフォーラム投稿があり、他のユーザーを名前で参照し、分析のためにそれらの名前を取り除きたい場合は、大量の単語としてカウントされます。以下は、このデータフレームのdputです:置換のために列の複数の行の組み合わせに一致するRパターン?

structure(list(uber_name = structure(c(9L, 2L, 1L, 2L, 3L, 10L, 
3L, 9L, 11L), .Label = c("aluber1968", "bigdreamslittlemoney", 
"FuberNYC", "JamesM", "jonnyplastic", "JustDre", "KING D", "klimarov", 
"NycGirl705", "shumacker", "spike69", "theitalian", "Uberman8263", 
"Ez2dj", "Manhmptn", "NYCDriver", "staytune", "UBS", "Ubured", 
"Jme10", "Lennyyellowcab", "Mir", "eagle88", "Ibuys4730", "NoUsername", 
"BathoTrask", "Douglas", "LGC", "Jakeinny098", "Rustyshackelford", 
"shabbyroch", "ubershiza", "drbrkln", "elys123", "bossdriver", 
"HerbyHerb", "Jim1985", "Malik38", "STIDRIVER", "vxlon7", "Waqar", 
"tohunt4me", "DogPound", "SuliB", "AlBrklyn", "John Cunningham", 
"MReeves", "PinkFoot", "alextheboss", "luisannalui", "censoredbytheFCC", 
"KONY", "cieru", "Jorlev", "Smooth954", "marcusguber", "nyc321", 
"Tony from New Jersey", "Vanstaal", "Bkrah", "brunoamat2", "gebbels6", 
"Kevin7889", "uanic", "Uber OG", "UberKilledMyMarriage", "ya mon its me", 
"HunkAWestchester", "Mr Affinito", "ninja warrior", "NoNonsense", 
"notacabdriver", "Notauberhater", "TwoFiddyMile", "bilyvh", "cybertec69", 
"JohnnyBlanco", "SOBE", "ubernyc"), class = "factor"), uber_write = c("I see people post about getting a w", 
"you have 2 choices either you drive", "More than a year ago I didnt drive ", 
"yeah i stopped driving for them for", "Ive been getting some promotions la", 
"FuberNYC saidIve been getting some ", "shumacker saidAnd You feel importan", 
"FuberNYC saidIve been getting some ", "They start coming after few months " 
), uber_date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L 
), .Label = c("Jan 19, 2017", "Mar 30, 2017", "Jan 23, 2017", 
"Jan 12, 2017", "Jan 9, 2017", "Jan 1, 2017", "Dec 31, 2016", 
"Nov 26, 2016", "Nov 3, 2016", "Dec 22, 2016", "Dec 13, 2016", 
"Dec 2, 2016", "Nov 15, 2016", "Oct 31, 2016", "Oct 20, 2016", 
"Mar 14, 2017", "Sep 1, 2016", "Jul 26, 2016", "Mar 1, 2017", 
"Feb 25, 2017", "Sep 8, 2016", "Sep 9, 2016", "Apr 21, 2015"), class = "factor")), .Names = c("uber_name", 
"uber_write", "uber_date"), class = c("data.table", "data.frame" 
), row.names = c(NA, -9L), .internal.selfref = <pointer: 0x0000000000220788>) 

私は前にGSUB使用しましたが、私は、このインスタンスに適用する方法を見つけ出すことはできません。私は "uber_names"列に名前をつけて、これらのユーザーを投稿である "uber_writes"のいずれかから削除します。

あなたのdata.table( dt)にすべてのユーザー名のベクトル uber_namesを行い、その後、同様に、 ""と一致するすべてのユーザー名を置き換えるために、正規表現に (name1|name2|name3)を生成することができ

答えて

0

library(data.table) 
uber_names <- dt$uber_name 
dt[, uber_write_filtered := gsub(
    pattern = paste0("(", paste(uber_names, collapse = "|"), ")"), 
    replacement = "", uber_write)] 
+0

これは私のおかげでうまくいった! – LoF10

0

私は「wasnあなたのデータフレームを再作成することができtはなく、ここに近いです一つだ:

data <- 
structure(list(uber_name = c("aluber1968", "bigdreamslittlemoney", 
"FuberNYC", "JamesM", "jonnyplastic", "JustDre", "KING D", "klimarov", 
"NycGirl705", "shumacker", "spike69", "theitalian", "Uberman8263", 
"Ez2dj", "Manhmptn", "NYCDriver", "staytune", "UBS", "Ubured", 
"Jme10", "Lennyyellowcab", "Mir", "eagle88", "Ibuys4730", "NoUsername", 
"BathoTrask", "Douglas", "LGC", "Jakeinny098", "Rustyshackelford", 
"shabbyroch", "ubershiza", "drbrkln", "elys123", "bossdriver", 
"HerbyHerb", "Jim1985", "Malik38", "STIDRIVER", "vxlon7", "Waqar", 
"tohunt4me", "DogPound", "SuliB", "AlBrklyn", "John Cunningham", 
"MReeves", "PinkFoot", "alextheboss", "luisannalui", "censoredbytheFCC", 
"KONY", "cieru", "Jorlev", "Smooth954", "marcusguber", "nyc321", 
"Tony from New Jersey", "Vanstaal", "Bkrah", "brunoamat2", "gebbels6", 
"Kevin7889", "uanic", "Uber OG", "UberKilledMyMarriage", "ya mon its me", 
"HunkAWestchester", "Mr Affinito", "ninja warrior", "NoNonsense", 
"notacabdriver", "Notauberhater", "TwoFiddyMile", "bilyvh", "cybertec69", 
"JohnnyBlanco", "SOBE", "ubernyc"), uber_write = c("I see people post about getting a w", 
"you have 2 choices either you drive", "More than a year ago I didnt drive ", 
"yeah i stopped driving for them for", "Ive been getting some promotions la", 
"FuberNYC saidIve been getting some ", "shumacker saidAnd You feel importan", 
"FuberNYC saidIve been getting some ", "They start coming after few months ", 
"I see people post about getting a w", "you have 2 choices either you drive", 
"More than a year ago I didnt drive ", "yeah i stopped driving for them for", 
"Ive been getting some promotions la", "FuberNYC saidIve been getting some ", 
"shumacker saidAnd You feel importan", "FuberNYC saidIve been getting some ", 
"They start coming after few months ", "I see people post about getting a w", 
"you have 2 choices either you drive", "More than a year ago I didnt drive ", 
"yeah i stopped driving for them for", "Ive been getting some promotions la", 
"FuberNYC saidIve been getting some ", "shumacker saidAnd You feel importan", 
"FuberNYC saidIve been getting some ", "They start coming after few months ", 
"I see people post about getting a w", "you have 2 choices either you drive", 
"More than a year ago I didnt drive ", "yeah i stopped driving for them for", 
"Ive been getting some promotions la", "FuberNYC saidIve been getting some ", 
"shumacker saidAnd You feel importan", "FuberNYC saidIve been getting some ", 
"They start coming after few months ", "I see people post about getting a w", 
"you have 2 choices either you drive", "More than a year ago I didnt drive ", 
"yeah i stopped driving for them for", "Ive been getting some promotions la", 
"FuberNYC saidIve been getting some ", "shumacker saidAnd You feel importan", 
"FuberNYC saidIve been getting some ", "They start coming after few months ", 
"I see people post about getting a w", "you have 2 choices either you drive", 
"More than a year ago I didnt drive ", "yeah i stopped driving for them for", 
"Ive been getting some promotions la", "FuberNYC saidIve been getting some ", 
"shumacker saidAnd You feel importan", "FuberNYC saidIve been getting some ", 
"They start coming after few months ", "I see people post about getting a w", 
"you have 2 choices either you drive", "More than a year ago I didnt drive ", 
"yeah i stopped driving for them for", "Ive been getting some promotions la", 
"FuberNYC saidIve been getting some ", "shumacker saidAnd You feel importan", 
"FuberNYC saidIve been getting some ", "They start coming after few months ", 
"I see people post about getting a w", "you have 2 choices either you drive", 
"More than a year ago I didnt drive ", "yeah i stopped driving for them for", 
"Ive been getting some promotions la", "FuberNYC saidIve been getting some ", 
"shumacker saidAnd You feel importan", "FuberNYC saidIve been getting some ", 
"They start coming after few months ", "I see people post about getting a w", 
"you have 2 choices either you drive", "More than a year ago I didnt drive ", 
"yeah i stopped driving for them for", "Ive been getting some promotions la", 
"FuberNYC saidIve been getting some ", "shumacker saidAnd You feel importan" 
)), .Names = c("uber_name", "uber_write"), row.names = c(NA, 
-79L), class = "data.frame") 

そして、ここでの答えです:

paste0(data$uber_name, collapse = "|") -> dont_want 
data$uber_write2 <- gsub(pattern = dont_want, "", data$uber_write) 
関連する問題