"新しい"と "戻る"人の問題を解決しようとしました。ここに私の試みは次のとおりです。
select session_day,
COUNT(distinct user_id) AS user_cnt,
count(distinct user_id) - lag(count(distinct user_id))
over (order by session_day) gain,
count(newu) AS newu, count(returnu) AS returnu
from (
select session_id,
session_day,
user_id,
CASE WHEN
count(*) over (partition by user_id ORDER BY session_day,session_id ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
= 1
THEN 1
END
AS newu,
CASE WHEN
lag(session_day,1) over (partition by user_id ORDER BY session_day,session_id)
<>
lag(session_day,1) over (order by session_day,session_id)
THEN 1
END AS returnu
from user_traffic u
)
group by session_day
order by session_day;
テストデータと出力:
create table user_traffic (session_id number(6), session_day date,
user_id number(6), product_id number(6));
insert into user_traffic values ( 1, date '2016-09-07', 101, 1);
insert into user_traffic values ( 2, date '2016-09-07', 101, 4);
insert into user_traffic values ( 3, date '2016-09-07', 102, 1);
insert into user_traffic values ( 4, date '2016-09-08', 101, 2);
insert into user_traffic values ( 5, date '2016-09-08', 101, 4);
insert into user_traffic values ( 6, date '2016-09-09', 102, 1);
insert into user_traffic values ( 7, date '2016-09-10', 102, 1);
insert into user_traffic values ( 8, date '2016-09-10', 103, 3);
SESSION_DAY CNT GAIN NEW RETURNS
----------- ---------- ---------- ---------- ----------
2016-09-07 2 2 0 -- 101 & 102 are new
2016-09-08 1 -1 0 0
2016-09-09 1 0 0 1 -- 102 returned
2016-09-10 2 1 1 0 -- 103 is new
は、私にはかなり固体に見えます... – JohnHC
「新規」または「失われた」などとして、顧客を識別するもの - のみに基づきますあなたが提示した4つのテーブルの列? – mathguy
ユーザーが初回ユーザーかリターンユーザーかを識別する他の方法はありません..問題の「新しい」部分は私を混乱させます... – Teja