これを解決する1つの方法は、analytical functionsです。例として:
data
はあなたの
ga_sessionsデータ(私は 'ユーザー' と 'fullvisitoridを' という名前)のシミュレーションである
#standardSQL
WITH data AS(
select '1' as user, '1' as visitid, '20170520' as date, ARRAY<STRUCT<hitNumber INT64, eventInfo STRUCT<eventCategory STRING> >> [STRUCT(1 as hitNumber, STRUCT('event1' as eventCategory) as eventInfo)] hits UNION ALL
select '1' as user, '2' as visitid, '20170521' as date, ARRAY<STRUCT<hitNumber INT64, eventInfo STRUCT<eventCategory STRING> >> [STRUCT(1 as hitNumber, STRUCT('' as eventCategory) as eventInfo)] hits UNION ALL
select '1' as user, '3' as visitid, '20170522' as date, ARRAY<STRUCT<hitNumber INT64, eventInfo STRUCT<eventCategory STRING> >> [STRUCT(1 as hitNumber, STRUCT('event2' as eventCategory) as eventInfo)] hits UNION ALL
select '1' as user, '4' as visitid, '20170523' as date, ARRAY<STRUCT<hitNumber INT64, eventInfo STRUCT<eventCategory STRING> >> [STRUCT(1 as hitNumber, STRUCT('' as eventCategory) as eventInfo)] hits UNION ALL
select '2' as user, '1' as visitid, '20170520' as date, ARRAY<STRUCT<hitNumber INT64, eventInfo STRUCT<eventCategory STRING> >> [STRUCT(1 as hitNumber, STRUCT('event1' as eventCategory) as eventInfo)] hits UNION ALL
select '2' as user, '2' as visitid, '20170521' as date, ARRAY<STRUCT<hitNumber INT64, eventInfo STRUCT<eventCategory STRING> >> [STRUCT(1 as hitNumber, STRUCT('event2' as eventCategory) as eventInfo)] hits UNION ALL
select '2' as user, '3' as visitid, '20170522' as date, ARRAY<STRUCT<hitNumber INT64, eventInfo STRUCT<eventCategory STRING> >> [STRUCT(1 as hitNumber, STRUCT('' as eventCategory) as eventInfo)] hits union all
select '3' as user, '1' as visitid, '20170520' as date, ARRAY<STRUCT<hitNumber INT64, eventInfo STRUCT<eventCategory STRING> >> [STRUCT(1 as hitNumber, STRUCT('event1' as eventCategory) as eventInfo)] hits UNION ALL
select '3' as user, '2' as visitid, '20170521' as date, ARRAY<STRUCT<hitNumber INT64, eventInfo STRUCT<eventCategory STRING> >> [STRUCT(1 as hitNumber, STRUCT('' as eventCategory) as eventInfo)] hits UNION ALL
select '3' as user, '3' as visitid, '20170522' as date, ARRAY<STRUCT<hitNumber INT64, eventInfo STRUCT<eventCategory STRING> >> [STRUCT(1 as hitNumber, STRUCT('' as eventCategory) as eventInfo)] hits
)
SELECT
user,
visitid,
date
FROM(
SELECT
user,
visitid,
date,
MIN(CASE WHEN hits.eventInfo.eventCategory = 'event1' THEN date END) OVER(PARTITION BY user) min_date,
MAX(CASE WHEN hits.eventInfo.eventCategory = 'event2' THEN date END) OVER(PARTITION BY user) max_date
FROM data,
UNNEST(hits) hits
)
WHERE date BETWEEN min_date AND max_date
。
これは、与えられたユーザーは日付1、日付2のための明確なイベントを持つことができます(そう、それはそれぞれMIN
とMAX
を取っている)という仮定を作り、それはあなたがあなたのイベントことを考えると(eventCategory
フィールドにイベントを保存していることを前提としていセッションレベルで「ダウンロード」と「購入」が定義されている場合は、フィールドをhits.eventInfo.eventCategory
の代わりに使用することをお勧めします。私が作ったの仮定があなたのデータと一致していない場合は、あなたが欲しいものを照会するためにこれらの技術を適応させることができ
SELECT
user,
ARRAY(SELECT AS STRUCT visitid, date FROM UNNEST(user_data) WHERE date BETWEEN min_date AND max_date) user_data
FROM(
SELECT
user,
ARRAY_AGG((SELECT AS STRUCT visitid, date)) user_data,
MIN(CASE WHEN EXISTS(SELECT 1 FROM UNNEST(hits) hits WHERE hits.eventInfo.eventCategory = 'event1') then date END) min_date,
MAX(CASE WHEN EXISTS(SELECT 1 FROM UNNEST(hits) hits WHERE hits.eventInfo.eventCategory = 'event2') THEN date END) max_date
FROM data
GROUP BY user
)
WHERE ARRAY_LENGTH(ARRAY(SELECT AS STRUCT visitid, date FROM UNNEST(user_data) WHERE date BETWEEN min_date AND max_date)) > 0
:分析機能以外
は、あなたはまた、標準SQLのバージョンのARRAYs and STRUCTsで動作することができます。また、シミュレートされたデータをテスト目的で使用することもできます(データセットに合わせて調整することもできます)。
ありがとう@ウィルこれが助けになりました! :) –