2017-01-19 9 views
5

すべて、リソースクエリ実行中に超過しました。 BigQuery

私は、BigQueryを使用して分析を実行するためのより大きなコードを取得しようとしていましたが、多くの状態と存在するデータの量について問題は続いています。私たちは何年ものデータについて話しています。私のクエリは嗅ぎどころではないかもしれませんが、特定のグループに基づいて合計を取得しようとするのは、私が必要とするものです。

リソースがエラーを超えないようにするために、クエリ内で何を変更する必要がありますか?

SELECT 
    COMPANY_NAME, 
    RATING_CLASS, 
    COMPANY_KEY, 
    -- State Info & Calculations 
    -- Over is used as a WINDOW function to SUM ALL results within the given query 
    SUM(ZIP5_MED_SUPP_TOOL_NUM_QUOTE) OVER() AS STATE_MED_SUPP_TOOL_NUM_QUOTE, 
    -- ZIP3 Info & Calculations 
    ZIP3, 
    ZIP3_MED_SUPP_TOOL_NUM_QUOTE AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP3_TOTAL_RESULT_APPEARANCE, 
    ZIP3_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_LOWEST, 
    ZIP3_AVG_RATIO_TO_LOWEST AS ZIP3_AVG_RATIO_TO_LOWEST, 
    ZIP3_AVG_RANK AS ZIP3_AVG_RANK, 
    ZIP3_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP5, 
    ZIP3_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP10, 
    ZIP3_AVG_CENT_DIFF AS ZIP3_AVG_CENT_DIFF, 
    ZIP3_DISCOUNTED_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_LOWEST, 
    ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP3_DISCOUNTED_AVG_RANK AS ZIP3_DISCOUNTED_AVG_RANK, 
    ZIP3_DISCOUNTED_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP5, 
    ZIP3_DISCOUNTED_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP10, 
    ZIP3_DISCOUNTED_AVG_CENT_DIFF AS ZIP3_DISCOUNTED_AVG_CENT_DIFF, 
    -- ZIP5 Info & Calculations 
    ZIP5, 
    ZIP5_MED_SUPP_TOOL_NUM_QUOTE AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP5_TOTAL_RESULT_APPEARANCE, 
    ZIP5_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_LOWEST, 
    ZIP5_AVG_RATIO_TO_LOWEST AS ZIP5_AVG_RATIO_TO_LOWEST, 
    ZIP5_AVG_RANK AS ZIP5_AVG_RANK, 
    ZIP5_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP5, 
    ZIP5_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP10, 
    ZIP5_AVG_CENT_DIFF AS ZIP5_AVG_CENT_DIFF, 
    ZIP5_DISCOUNTED_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_LOWEST, 
    ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP5_DISCOUNTED_AVG_RANK AS ZIP5_DISCOUNTED_AVG_RANK, 
    ZIP5_DISCOUNTED_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP5, 
    ZIP5_DISCOUNTED_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP10, 
    ZIP5_DISCOUNTED_AVG_CENT_DIFF AS ZIP5_DISCOUNTED_AVG_CENT_DIFF, 
FROM (
    SELECT 
    COMPANY_NAME, 
    COMPANY_KEY, 
    RATING_CLASS, 
    -- ZIP3 
    ZIP3, 
    COUNT(DISTINCT logging_key) OVER (PARTITION BY ZIP3) AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE, 
    COUNT(*) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_TOTAL_RESULT_APPEARANCE, 
    SUM(CASE 
     WHEN lowest = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_AVG_RANK, 
    SUM(CASE 
     WHEN top5 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_TOP5_COUNT, 
    SUM(CASE 
     WHEN top10 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_TOP10_COUNT, 
    AVG(discounted_cent_diff) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_AVG_CENT_DIFF, 
    SUM(CASE 
     WHEN DISCOUNTED_lowest = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_AVG_RANK, 
    SUM(CASE 
     WHEN DISCOUNTED_top5 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_TOP5_COUNT, 
    SUM(CASE 
     WHEN DISCOUNTED_top10 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_TOP10_COUNT, 
    AVG(discounted_cent_diff) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_AVG_CENT_DIFF, 
    -- ZIP5 
    ZIP5, 
    COUNT(DISTINCT logging_key) OVER (PARTITION BY ZIP5) AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE, 
    COUNT(*) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_TOTAL_RESULT_APPEARANCE, 
    SUM(CASE 
     WHEN lowest = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_AVG_RANK, 
    SUM(CASE 
     WHEN top5 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_TOP5_COUNT, 
    SUM(CASE 
     WHEN top10 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_TOP10_COUNT, 
    AVG(discounted_cent_diff) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_AVG_CENT_DIFF, 
    SUM(CASE 
     WHEN DISCOUNTED_lowest = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_AVG_RANK, 
    SUM(CASE 
     WHEN DISCOUNTED_top5 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_TOP5_COUNT, 
    SUM(CASE 
     WHEN DISCOUNTED_top10 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_TOP10_COUNT, 
    AVG(discounted_cent_diff) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_AVG_CENT_DIFF, 
    FROM 
    [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE 
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", "6277136540237824") 
    GROUP BY 
    COMPANY_NAME, 
    COMPANY_KEY, 
    RATING_CLASS, 
    ZIP3, 
    ZIP5, 
    LOWEST, 
    RATIO_TO_MIN, 
    RATE_ORDER, 
    TOP5, 
    TOP10, 
    CENT_DIFF, 
    DISCOUNTED_LOWEST, 
    DISCOUNTED_RATIO_TO_MIN, 
    DISCOUNTED_RATE_ORDER, 
    DISCOUNTED_TOP5, 
    DISCOUNTED_TOP10, 
    DISCOUNTED_CENT_DIFF, 
    LOGGING_KEY) 
GROUP BY 
    COMPANY_NAME, 
    COMPANY_KEY, 
    RATING_CLASS, 
    -- ZIP3 General 
    ZIP3, 
    ZIP3_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP3_TOTAL_RESULT_APPEARANCE, 
    ZIP3_LOWEST, 
    ZIP3_AVG_RATIO_TO_LOWEST, 
    ZIP3_AVG_RANK, 
    ZIP3_TOP5, 
    ZIP3_TOP10, 
    ZIP3_AVG_CENT_DIFF, 
    ZIP3_DISCOUNTED_LOWEST, 
    ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP3_DISCOUNTED_AVG_RANK, 
    ZIP3_DISCOUNTED_TOP5, 
    ZIP3_DISCOUNTED_TOP10, 
    ZIP3_DISCOUNTED_AVG_CENT_DIFF, 
    -- ZIP5 General 
    ZIP5, 
    ZIP5_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP5_TOTAL_RESULT_APPEARANCE, 
    ZIP5_LOWEST, 
    ZIP5_AVG_RATIO_TO_LOWEST, 
    ZIP5_AVG_RANK, 
    ZIP5_TOP5, 
    ZIP5_TOP10, 
    ZIP5_AVG_CENT_DIFF, 
    ZIP5_DISCOUNTED_LOWEST, 
    ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP5_DISCOUNTED_AVG_RANK, 
    ZIP5_DISCOUNTED_TOP5, 
    ZIP5_DISCOUNTED_TOP10, 
    ZIP5_DISCOUNTED_AVG_CENT_DIFF 

提案の修正と更新クエリ:BigQueryのは、共有リソースは、BigQueryのは、クエリが必要とし、それに応じてリソースを割り当てるリソースの量を近似している という事実に:

SELECT 
    main.COMPANY_NAME AS COMPANY_NAME, 
    main.COMPANY_KEY AS COMPANY_KEY, 
    main.RATING_CLASS AS RATING_CLASS, 
    state_count.STATE_MED_SUPP_TOOL_NUM_QUOTE AS STATE_MED_SUPP_TOOL_NUM_QUOTE, 
    -- ZIP3 
    main.ZIP3 AS ZIP3, 
    ZIP3_COUNT.ZIP3_MED_SUPP_TOOL_NUM_QUOTE AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE, 
    ZIP3_SUB.ZIP3_AVG_RATIO_TO_LOWEST AS ZIP3_AVG_RATIO_TO_LOWEST, 
    ZIP3_SUB.ZIP3_TOP5_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP5, 
    ZIP3_SUB.ZIP3_LOWEST_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_LOWEST, 
    ZIP3_SUB.ZIP3_TOP10_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP10, 
    ZIP3_SUB.ZIP3_AVG_RANK AS ZIP3_AVG_RANK, 
    ZIP3_SUB.ZIP3_AVG_CENT_DIFF AS ZIP3_AVG_CENT_DIFF, 
    ZIP3_SUB.ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP3_SUB.ZIP3_DISCOUNTED_TOP5_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP5, 
    ZIP3_SUB.ZIP3_DISCOUNTED_LOWEST_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_LOWEST, 
    ZIP3_SUB.ZIP3_DISCOUNTED_TOP10_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP10, 
    ZIP3_SUB.ZIP3_DISCOUNTED_AVG_RANK AS ZIP3_DISCOUNTED_AVG_RANK, 
    ZIP3_SUB.ZIP3_DISCOUNTED_AVG_CENT_DIFF AS ZIP3_DISCOUNTED_AVG_CENT_DIFF, 
    -- ZIP5 
    main.ZIP5 AS ZIP5, 
    ZIP5_COUNT.ZIP5_MED_SUPP_TOOL_NUM_QUOTE AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE, 
    ZIP5_SUB.ZIP5_AVG_RATIO_TO_LOWEST AS ZIP5_AVG_RATIO_TO_LOWEST, 
    ZIP5_SUB.ZIP5_TOP5_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP5, 
    ZIP5_SUB.ZIP5_LOWEST_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_LOWEST, 
    ZIP5_SUB.ZIP5_TOP10_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP10, 
    ZIP5_SUB.ZIP5_AVG_RANK AS ZIP5_AVG_RANK, 
    ZIP5_SUB.ZIP5_AVG_CENT_DIFF AS ZIP5_AVG_CENT_DIFF, 
    ZIP5_SUB.ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP5_SUB.ZIP5_DISCOUNTED_TOP5_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP5, 
    ZIP5_SUB.ZIP5_DISCOUNTED_LOWEST_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_LOWEST, 
    ZIP5_SUB.ZIP5_DISCOUNTED_TOP10_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP10, 
    ZIP5_SUB.ZIP5_DISCOUNTED_AVG_RANK AS ZIP5_DISCOUNTED_AVG_RANK, 
    ZIP5_SUB.ZIP5_DISCOUNTED_AVG_CENT_DIFF AS ZIP5_DISCOUNTED_AVG_CENT_DIFF, 
FROM (
    SELECT 
    COMPANY_NAME, 
    COMPANY_KEY, 
    RATING_CLASS, 
    ZIP3, 
    ZIP5, 
    STATE, 
    FROM 
    [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE 
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", 
     "6277136540237824", 
     "4872666167115776", 
     "6396348765044736", 
     "6139303562313728", 
     "4988973881491456") 
    AND portal_key NOT IN ("5878607637381120") 
    GROUP BY 
    COMPANY_NAME, 
    COMPANY_KEY, 
    RATING_CLASS, 
    ZIP3, 
    ZIP5, 
    STATE, 
    ) AS MAIN 
LEFT JOIN (
    SELECT 
    ZIP3, 
    COUNT(*) AS ZIP3_TOTAL_RESULT_APPEARANCE, 
    COMPANY_KEY, 
    RATING_CLASS, 
    AVG(discounted_ratio_to_min) AS ZIP3_AVG_RATIO_TO_LOWEST, 
    SUM(CASE 
     WHEN TOP5 = TRUE THEN 1 
     ELSE 0 END) AS ZIP3_TOP5_COUNT, 
    SUM(CASE 
     WHEN LOWEST = TRUE THEN 1 
     ELSE 0 END) AS ZIP3_LOWEST_COUNT, 
    SUM(CASE 
     WHEN TOP10 = TRUE THEN 1 
     ELSE 0 END) AS ZIP3_TOP10_COUNT, 
    AVG(discounted_rate_order) AS ZIP3_AVG_RANK, 
    AVG(discounted_cent_diff) AS ZIP3_AVG_CENT_DIFF, 
    AVG(discounted_ratio_to_min) AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    SUM(CASE 
     WHEN DISCOUNTED_TOP5 = TRUE THEN 1 
     ELSE 0 END) AS ZIP3_DISCOUNTED_TOP5_COUNT, 
    SUM(CASE 
     WHEN DISCOUNTED_LOWEST = TRUE THEN 1 
     ELSE 0 END) AS ZIP3_DISCOUNTED_LOWEST_COUNT, 
    SUM(CASE 
     WHEN DISCOUNTED_TOP10 = TRUE THEN 1 
     ELSE 0 END) AS ZIP3_DISCOUNTED_TOP10_COUNT, 
    AVG(discounted_rate_order) AS ZIP3_DISCOUNTED_AVG_RANK, 
    AVG(discounted_cent_diff) AS ZIP3_DISCOUNTED_AVG_CENT_DIFF, 
    FROM 
    [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE 
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", 
     "6277136540237824") 
    GROUP BY 
    ZIP3, 
    COMPANY_NAME, 
    COMPANY_KEY, 
    RATING_CLASS, 
    ) AS ZIP3_SUB 
ON 
    main.ZIP3 = ZIP3_SUB.ZIP3 
    AND main.COMPANY_KEY = ZIP3_SUB.COMPANY_KEY 
    AND main.RATING_CLASS = ZIP3_SUB.RATING_CLASS 
LEFT JOIN (
    SELECT 
    ZIP3, 
    EXACT_COUNT_DISTINCT(logging_key) AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE 
    FROM 
    [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE 
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", 
     "6277136540237824") 
    GROUP BY 
    ZIP3) AS ZIP3_COUNT 
ON 
    main.ZIP3 = ZIP3_COUNT.ZIP3 
LEFT JOIN (
    SELECT 
    ZIP5, 
    COUNT(*) AS ZIP5_TOTAL_RESULT_APPEARANCE, 
    COMPANY_KEY, 
    RATING_CLASS, 
    AVG(discounted_ratio_to_min) AS ZIP5_AVG_RATIO_TO_LOWEST, 
    SUM(CASE 
     WHEN TOP5 = TRUE THEN 1 
     ELSE 0 END) AS ZIP5_TOP5_COUNT, 
    SUM(CASE 
     WHEN LOWEST = TRUE THEN 1 
     ELSE 0 END) AS ZIP5_LOWEST_COUNT, 
    SUM(CASE 
     WHEN TOP10 = TRUE THEN 1 
     ELSE 0 END) AS ZIP5_TOP10_COUNT, 
    AVG(discounted_rate_order) AS ZIP5_AVG_RANK, 
    AVG(discounted_cent_diff) AS ZIP5_AVG_CENT_DIFF, 
    AVG(discounted_ratio_to_min) AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    SUM(CASE 
     WHEN DISCOUNTED_TOP5 = TRUE THEN 1 
     ELSE 0 END) AS ZIP5_DISCOUNTED_TOP5_COUNT, 
    SUM(CASE 
     WHEN DISCOUNTED_LOWEST = TRUE THEN 1 
     ELSE 0 END) AS ZIP5_DISCOUNTED_LOWEST_COUNT, 
    SUM(CASE 
     WHEN DISCOUNTED_TOP10 = TRUE THEN 1 
     ELSE 0 END) AS ZIP5_DISCOUNTED_TOP10_COUNT, 
    AVG(discounted_rate_order) AS ZIP5_DISCOUNTED_AVG_RANK, 
    AVG(discounted_cent_diff) AS ZIP5_DISCOUNTED_AVG_CENT_DIFF, 
    FROM 
    [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE 
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", 
     "6277136540237824") 
    GROUP BY 
    ZIP5, 
    COMPANY_NAME, 
    COMPANY_KEY, 
    RATING_CLASS, 
    ) AS ZIP5_SUB 
ON 
    main.ZIP5 = ZIP5_SUB.ZIP5 
    AND main.COMPANY_KEY = ZIP5_SUB.COMPANY_KEY 
    AND main.RATING_CLASS = ZIP5_SUB.RATING_CLASS 
LEFT JOIN (
    SELECT 
    ZIP5, 
    EXACT_COUNT_DISTINCT(logging_key) AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE 
    FROM 
    [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE 
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", 
     "6277136540237824") 
    GROUP BY 
    ZIP5) AS ZIP5_COUNT 
ON 
    main.ZIP5 = ZIP5_COUNT.ZIP5 
LEFT JOIN (
    SELECT 
    STATE, 
    EXACT_COUNT_DISTINCT(logging_key) AS STATE_MED_SUPP_TOOL_NUM_QUOTE 
    FROM 
    [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE 
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", 
     "6277136540237824") 
    GROUP BY 
    STATE) AS STATE_COUNT 
ON 
    main.STATE = STATE_COUNT.STATE 

説明。同じテーブルの複数の結合にクエリを分割することにより、計算は当初よりも多くのリソースによって実行できます。クエリを制限するのがなぜうまく機能するかについての技術的な説明は、スタックオーバーフローの応答​​のJordan Tiganiを参照してください。

+0

このクエリはおおよそ何ギガバイトの処理をしていますか? –

+0

私はあなたの質問がなぜ高価なのか、いくつか考えています。その理由の1つは、過剰に設計されている可能性があるためです。しかし、あなたがここで達成しようとしていることを正確に(論理的に)明確に把握していなければ、最終的な結論を下すのは難しく、判断に間違いが生じやすくなります。私は、あなたがこの質問で何を得ようとしているのかを詳細に説明することをお勧めします。私たちが今のように盲目になるのを助けることができます:o) –

答えて

2

何とか私はあなたが必要としている以下のように感じます。私は本当にあなたのロジックをリバースエンジニアリングするだけの比較的盲目の試みであると間違っている可能性がありますので、私が間違っている場合は重く判断してはいけない、ここ
それをテストすることはできませんが、私は実際に

SELECT 
    main.COMPANY_NAME AS COMPANY_NAME, 
    main.COMPANY_KEY AS COMPANY_KEY, 
    main.RATING_CLASS AS RATING_CLASS, 
    SUM(ZIP5_MED_SUPP_TOOL_NUM_QUOTE) OVER() AS STATE_MED_SUPP_TOOL_NUM_QUOTE, 
    -- ZIP3 
    main.ZIP3 AS ZIP3, 
    ZIP3_MED_SUPP_TOOL_NUM_QUOTE AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP3_TOTAL_RESULT_APPEARANCE, 
    ZIP3_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_LOWEST, 
    ZIP3_AVG_RATIO_TO_LOWEST AS ZIP3_AVG_RATIO_TO_LOWEST, 
    ZIP3_AVG_RANK AS ZIP3_AVG_RANK, 
    ZIP3_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP5, 
    ZIP3_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP10, 
    ZIP3_AVG_CENT_DIFF AS ZIP3_AVG_CENT_DIFF, 
    ZIP3_DISCOUNTED_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_LOWEST, 
    ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP3_DISCOUNTED_AVG_RANK AS ZIP3_DISCOUNTED_AVG_RANK, 
    ZIP3_DISCOUNTED_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP5, 
    ZIP3_DISCOUNTED_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP10, 
    ZIP3_DISCOUNTED_AVG_CENT_DIFF AS ZIP3_DISCOUNTED_AVG_CENT_DIFF, 
    -- ZIP5 
    main.ZIP5 AS ZIP5, 
    ZIP5_MED_SUPP_TOOL_NUM_QUOTE AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP5_TOTAL_RESULT_APPEARANCE, 
    ZIP5_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_LOWEST, 
    ZIP5_AVG_RATIO_TO_LOWEST AS ZIP5_AVG_RATIO_TO_LOWEST, 
    ZIP5_AVG_RANK AS ZIP5_AVG_RANK, 
    ZIP5_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP5, 
    ZIP5_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP10, 
    ZIP5_AVG_CENT_DIFF AS ZIP5_AVG_CENT_DIFF, 
    ZIP5_DISCOUNTED_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_LOWEST, 
    ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP5_DISCOUNTED_AVG_RANK AS ZIP5_DISCOUNTED_AVG_RANK, 
    ZIP5_DISCOUNTED_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP5, 
    ZIP5_DISCOUNTED_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP10, 
    ZIP5_DISCOUNTED_AVG_CENT_DIFF AS ZIP5_DISCOUNTED_AVG_CENT_DIFF 
FROM (
    SELECT COMPANY_NAME, COMPANY_KEY, RATING_CLASS, ZIP3, ZIP5 
    FROM [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", "6277136540237824") 
    GROUP BY COMPANY_NAME, COMPANY_KEY, RATING_CLASS, ZIP3, ZIP5 
) AS main 
LEFT JOIN (
    SELECT 
    ZIP3, company_key, rating_class, 
    COUNT(*)             AS ZIP3_TOTAL_RESULT_APPEARANCE, 
    SUM(CASE WHEN lowest = TRUE THEN 1 ELSE 0 END)   AS ZIP3_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min)        AS ZIP3_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order)        AS ZIP3_AVG_RANK, 
    SUM(CASE WHEN top5 = TRUE THEN 1 ELSE 0 END)    AS ZIP3_TOP5_COUNT, 
    SUM(CASE WHEN top10 = TRUE THEN 1 ELSE 0 END)    AS ZIP3_TOP10_COUNT, 
    AVG(discounted_cent_diff)         AS ZIP3_AVG_CENT_DIFF, 
    SUM(CASE WHEN DISCOUNTED_lowest = TRUE THEN 1 ELSE 0 END) AS ZIP3_DISCOUNTED_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min)        AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order)        AS ZIP3_DISCOUNTED_AVG_RANK, 
    SUM(CASE WHEN DISCOUNTED_top5 = TRUE THEN 1 ELSE 0 END) AS ZIP3_DISCOUNTED_TOP5_COUNT, 
    SUM(CASE WHEN DISCOUNTED_top10 = TRUE THEN 1 ELSE 0 END) AS ZIP3_DISCOUNTED_TOP10_COUNT, 
    AVG(discounted_cent_diff)         AS ZIP3_DISCOUNTED_AVG_CENT_DIFF, 
    FROM [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z') 
     AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z') 
     AND user_key NOT IN ("6522869941010432", "6277136540237824") 
    GROUP BY ZIP3, company_key, rating_class 
) AS zip3_sub 
ON main.ZIP3 = zip3_sub.ZIP3 AND main.company_key = zip3_sub.company_key AND main.rating_class = zip3_sub.rating_class 
LEFT JOIN ( 
    SELECT 
    ZIP5, company_key, rating_class, 
    COUNT(*)             AS ZIP5_TOTAL_RESULT_APPEARANCE, 
    SUM(CASE WHEN lowest = TRUE THEN 1 ELSE 0 END)   AS ZIP5_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min)        AS ZIP5_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order)        AS ZIP5_AVG_RANK, 
    SUM(CASE WHEN top5 = TRUE THEN 1 ELSE 0 END)    AS ZIP5_TOP5_COUNT, 
    SUM(CASE WHEN top10 = TRUE THEN 1 ELSE 0 END)    AS ZIP5_TOP10_COUNT, 
    AVG(discounted_cent_diff)         AS ZIP5_AVG_CENT_DIFF, 
    SUM(CASE WHEN DISCOUNTED_lowest = TRUE THEN 1 ELSE 0 END) AS ZIP5_DISCOUNTED_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min)        AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order)        AS ZIP5_DISCOUNTED_AVG_RANK, 
    SUM(CASE WHEN DISCOUNTED_top5 = TRUE THEN 1 ELSE 0 END) AS ZIP5_DISCOUNTED_TOP5_COUNT, 
    SUM(CASE WHEN DISCOUNTED_top10 = TRUE THEN 1 ELSE 0 END) AS ZIP5_DISCOUNTED_TOP10_COUNT, 
    AVG(discounted_cent_diff)         AS ZIP5_DISCOUNTED_AVG_CENT_DIFF, 
    FROM [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", "6277136540237824") 
    GROUP BY ZIP5, company_key, rating_class 
) AS zip5_sub 
ON main.ZIP5 = zip5_sub.ZIP5 AND main.company_key = zip5_sub.company_key AND main.rating_class = zip5_sub.rating_class 
LEFT JOIN (
    SELECT ZIP3, COUNT(DISTINCT logging_key) AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE 
    FROM [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", "6277136540237824") 
    GROUP BY ZIP3 
) AS zip3_count 
ON main.ZIP3 = zip3_count.ZIP3 
LEFT JOIN (
    SELECT ZIP5, COUNT(DISTINCT logging_key) AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE 
    FROM [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", "6277136540237824") 
    GROUP BY ZIP5 
) AS zip5_count 
ON main.ZIP5 = zip5_count.ZIP5 
を成功するために安価になるだろうと感じ

また、BigQueryでは、Legacy SQL - COUNT(DISTINCT)関数は確率的です - 統計的近似を与え、正確であるとは限りません。
代わりEXACT_COUNT_DISTINCT()機能を使用することができます - この1つはあなたの正確な数が、バックエンドに少し高価なを与える

そしてもちろん、全体のクエリは、COUNT(DISTINCT)は、正確な数を生成し、標準のSQLがあるときBigQuery Standard SQLのために書き換えることができます。 BigQueryチームの推奨事項を使用することをお勧めします

+0

WOW。あなたは私の心を吹き飛ばした。私はBigQueryの観点から、リソースをどのように割り当てているのかと思いますが、これは完全に意味があります。これを私たちと共有していただきありがとうございます。私はあなたが提案した変更を考慮に入れて更新されたクエリを掲示しました。それは働いている。平均実行時間は86秒から30秒に短縮されました。 – SmittySmee

関連する問題