を、data.table
を使用して、いくつかのソリューションを提供しています。
table1 <- data.frame(user_id=c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2),
product_id = c(14, 24, 38, 40, 66, 2, 19, 30, 71, 98, 7, 16),
first_order = c(1, 2, 1, 4, 5, 3, 2, 4, 2, 4, 2, 3),
last_order = c(4, 7, 5, 8, 8, 3, 4, 7, 5, 9, 4, 5))
table2 <- data.frame(user_id=c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2),
order_number=c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6),
days_cumsum = c(0, 7, 15, 26, 34, 43, 53, 59, 66, 74, 82, 91, 5, 11, 17, 24, 29, 35))
library(data.table)
setDT(table1)
setDT(table2)
table1
#> user_id product_id first_order last_order
#> 1: 1 14 1 4
#> 2: 1 24 2 7
#> 3: 1 38 1 5
#> 4: 1 40 4 8
#> 5: 1 66 5 8
#> 6: 1 2 3 3
#> 7: 1 19 2 4
#> 8: 1 30 4 7
#> 9: 1 71 2 5
#> 10: 1 98 4 9
#> 11: 2 7 2 4
#> 12: 2 16 3 5
table2
#> user_id order_number days_cumsum
#> 1: 1 1 0
#> 2: 1 2 7
#> 3: 1 3 15
#> 4: 1 4 26
#> 5: 1 5 34
#> 6: 1 6 43
#> 7: 1 7 53
#> 8: 1 8 59
#> 9: 1 9 66
#> 10: 1 10 74
#> 11: 1 11 82
#> 12: 1 12 91
#> 13: 2 1 5
#> 14: 2 2 11
#> 15: 2 3 17
#> 16: 2 4 24
#> 17: 2 5 29
#> 18: 2 6 35
DayMin <- table1[table2, on = .(user_id, first_order = order_number), nomatch = 0]
setnames(DayMin, "days_cumsum", "dayMin")
DayMax <- table1[table2, on = .(user_id, last_order = order_number), nomatch = 0]
setnames(DayMax, "days_cumsum", "dayMax")
res <- DayMin[DayMax, on = .(user_id, product_id, first_order, last_order)]
# calculate diff and delete column
res[, c("diff", "dayMax", "dayMin") := list(dayMax - dayMin, NULL, NULL)]
res[]
#> user_id product_id first_order last_order diff
#> 1: 1 2 3 3 0
#> 2: 1 14 1 4 26
#> 3: 1 19 2 4 19
#> 4: 1 38 1 5 34
#> 5: 1 71 2 5 27
#> 6: 1 24 2 7 46
#> 7: 1 30 4 7 27
#> 8: 1 40 4 8 33
#> 9: 1 66 5 8 25
#> 10: 1 98 4 9 40
#> 11: 2 7 2 4 13
#> 12: 2 16 3 5 12
"パイプで連結されたような" バージョン1つのマージのために整形使用
table1[table2, on = .(user_id, first_order = order_number), nomatch = 0][
table2, on = .(user_id , last_order = order_number), nomatch = 0][
, `:=`(
diff = i.days_cumsum - days_cumsum,
days_cumsum = NULL,
i.days_cumsum = NULL
)][]
#> user_id product_id first_order last_order diff
#> 1: 1 2 3 3 0
#> 2: 1 14 1 4 26
#> 3: 1 19 2 4 19
#> 4: 1 38 1 5 34
#> 5: 1 71 2 5 27
#> 6: 1 24 2 7 46
#> 7: 1 30 4 7 27
#> 8: 1 40 4 8 33
#> 9: 1 66 5 8 25
#> 10: 1 98 4 9 40
#> 11: 2 7 2 4 13
#> 12: 2 16 3 5 12
の名前を変更せずにのみ
tab <- melt(table1, id = 1:2, value.name = "order_number")[table2, on = .(user_id, order_number), nomatch = 0]
res <- dcast(tab, user_id + product_id ~ variable, value.var = c("order_number", "days_cumsum"), sep = "#")
setnames(res, c("user_id", "product_id", "first_order", "last_order", "dayMin", "dayMax"))
res[, c("diff", "dayMax", "dayMin") := list(dayMax - dayMin, NULL, NULL)]
res
#> user_id product_id first_order last_order diff
#> 1: 1 2 3 3 0
#> 2: 1 14 1 4 26
#> 3: 1 19 2 4 19
#> 4: 1 24 2 7 46
#> 5: 1 30 4 7 27
#> 6: 1 38 1 5 34
#> 7: 1 40 4 8 33
#> 8: 1 66 5 8 25
#> 9: 1 71 2 5 27
#> 10: 1 98 4 9 40
#> 11: 2 7 2 4 13
#> 12: 2 16 3 5 12
与え例えば、予想される出力とは何ですか? –
申し訳ありませんが、コードに誤りがあります。私は –
@RonakShahのエラーを修正編集します。編集した表2を使用することができます。 コード内の関数を使用すると、新しいフィーチャ値を取得できます。 期待される出力:table1で新たに追加された各フィーチャ値[5] –