2017-05-19 10 views
0

無料で提供されているhttparchive harテーブルから情報を抽出しようとしています。 多くのjsonPathバリデーターが完全に(http://jsonpath.com/またはjsonpath.curiousconcept.com/)を実行していますが、BQはJSON_EXTRACT()関数でエラーを返します。これは、例えばペイロードJSONでBigQueryエラーjsonPathは有効ですが、JSON_EXTRACT

Error: JSONPath parse error at: [?(@.name=='Referer')]

SELECT 
    JSON_EXTRACT(payload,"$._host")AS host, 
    JSON_EXTRACT(payload,"$.request.headers[?(@.name=='Referer')]")AS referer, 
    url, 
    payload 
FROM 
    [httparchive:har.2016_01_01_chrome_requests] 
LIMIT 
    100 
例ペイロードJSONに基づいて

期待リターンは、以下のこれは誤りである

" http://www.echosdunet.net/ "

次のようになります。 これは私のクエリです:

{ 
    "pageref": "page_1_0", 
    "startedDateTime": "2016-01-03T22:18:52.632+00:00", 
    "time": 452, 
    "request": { 
     "method": "GET", 
     "url": "http://disqus.com/embed/comments/?base=default&version=f3e1717b71e7256da258d3a504e56865&f=echosdunet&t_i=node%2F19849&t_u=http%3A%2F%2Fwww.echosdunet.net%2Fnode%2F19849&t_e=Accueil&t_d=Comparatif%20et%20test%20ADSL%20et%20fibre&t_t=Accueil&s_o=default&l=fr", 
     "headersSize": 650, 
     "bodySize": -1, 
     "cookies": [], 
     "headers": [{ 
      "name": "Host", 
      "value": "disqus.com" 
     }, { 
      "name": "Connection", 
      "value": "keep-alive" 
     }, { 
      "name": "Accept", 
      "value": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8" 
     }, { 
      "name": "Upgrade-Insecure-Requests", 
      "value": "1" 
     }, { 
      "name": "User-Agent", 
      "value": "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36 PTST/254" 
     }, { 
      "name": "Referer", 
      "value": "http://www.echosdunet.net/" 
     }, { 
      "name": "Accept-Encoding", 
      "value": "gzip, deflate, sdch" 
     }, { 
      "name": "Accept-Language", 
      "value": "en-US,en;q=0.8" 
     }], 
     "httpVersion": "1.1", 
     "queryString": [{ 
      "name": "base", 
      "value": "default" 
     }, { 
      "name": "version", 
      "value": "f3e1717b71e7256da258d3a504e56865" 
     }, { 
      "name": "f", 
      "value": "echosdunet" 
     }, { 
      "name": "t_i", 
      "value": "node/19849" 
     }, { 
      "name": "t_u", 
      "value": "http://www.echosdunet.net/node/19849" 
     }, { 
      "name": "t_e", 
      "value": "Accueil" 
     }, { 
      "name": "t_d", 
      "value": "Comparatif et test ADSL et fibre" 
     }, { 
      "name": "t_t", 
      "value": "Accueil" 
     }, { 
      "name": "s_o", 
      "value": "default" 
     }, { 
      "name": "l", 
      "value": "fr" 
     }] 
    }, 
    "response": { 
     "status": 200, 
     "statusText": "", 
     "headersSize": 1161, 
     "bodySize": 2017, 
     "headers": [{ 
      "name": "Server", 
      "value": "nginx" 
     }, { 
      "name": "Content-Type", 
      "value": "text/html; charset=utf-8" 
     }, { 
      "name": "Content-Security-Policy", 
      "value": "script-src https://*.twitter.com:* https://api.adsnative.com/v1/ad.json *.adsafeprotected.com https://cas.criteo.com/delivery/0.1/napi.jsonp *.services.disqus.com:* http://referrer.disqus.com/juggler/ disqus.com http://*.twitter.com:* a.disquscdn.com https://referrer.disqus.com/juggler/ https://*.services.disqus.com:* *.moatads.com 'unsafe-eval' https://mobile.adnxs.com/mob https://ssl.google-analytics.com" 
     }, { 
      "name": "Link", 
      "value": "<http://a.disquscdn.com>;rel=preconnect,<http://a.disquscdn.com>;rel=dns-prefetch" 
     }, { 
      "name": "Cache-Control", 
      "value": "stale-if-error=3600, s-stalewhilerevalidate=3600, stale-while-revalidate=30, no-cache, must-revalidate, public, s-maxage=5" 
     }, { 
      "name": "p3p", 
      "value": "CP=\\DSP IDC CUR ADM DELi STP NAV COM UNI INT PHY DEM\\" 
     }, { 
      "name": "Timing-Allow-Origin", 
      "value": "*" 
     }, { 
      "name": "X-Content-Type-Options", 
      "value": "nosniff" 
     }, { 
      "name": "X-XSS-Protection", 
      "value": "1; mode=block" 
     }, { 
      "name": "Last-Modified", 
      "value": "Thu, 11 Jun 2015 13:30:36 GMT" 
     }, { 
      "name": "ETag", 
      "value": "W/\\lounge:view:3840102421.d93d9c4bc037078ffc811833ae267a6f.0\\" 
     }, { 
      "name": "Content-Encoding", 
      "value": "gzip" 
     }, { 
      "name": "Content-Length", 
      "value": "2017" 
     }, { 
      "name": "Accept-Ranges", 
      "value": "bytes" 
     }, { 
      "name": "Date", 
      "value": "Sun, 03 Jan 2016 22:18:51 GMT" 
     }, { 
      "name": "Age", 
      "value": "0" 
     }, { 
      "name": "Connection", 
      "value": "keep-alive" 
     }, { 
      "name": "Vary", 
      "value": "Accept-Encoding" 
     }], 
     "httpVersion": "1.1", 
     "redirectURL": "", 
     "content": { 
      "size": 2017, 
      "mimeType": "text/html" 
     }, 
     "cookies": [] 
    }, 
    "cache": {}, 
    "timings": { 
     "blocked": -1, 
     "dns": 202, 
     "connect": 32, 
     "ssl": -1, 
     "send": 0, 
     "wait": 108, 
     "receive": 110 
    }, 
    "_ip_addr": "104.156.81.134", 
    "_method": "GET", 
    "_host": "disqus.com", 
    "_url": "/embed/comments/?base=default&version=f3e1717b71e7256da258d3a504e56865&f=echosdunet&t_i=node%2F19849&t_u=http%3A%2F%2Fwww.echosdunet.net%2Fnode%2F19849&t_e=Accueil&t_d=Comparatif%20et%20test%20ADSL%20et%20fibre&t_t=Accueil&s_o=default&l=fr", 
    "_responseCode": "200", 
    "_load_ms": "218", 
    "_ttfb_ms": "108", 
    "_load_start": "2632", 
    "_bytesOut": "652", 
    "_bytesIn": "3180", 
    "_objectSize": "2017", 
    "_cacheControl": "stale-if-error=3600, s-stalewhilerevalidate=3600, stale-while-revalidate=30, no-cache, must-revalidate, public, s-maxage=5", 
    "_contentType": "text/html", 
    "_contentEncoding": "gzip", 
    "_type": "3", 
    "_socket": "153", 
    "_score_cache": "-1", 
    "_score_cdn": "-1", 
    "_score_gzip": "100", 
    "_score_cookies": "-1", 
    "_score_keep-alive": "100", 
    "_score_minify": "-1", 
    "_score_combine": "-1", 
    "_score_compress": "-1", 
    "_score_etags": "-1", 
    "_is_secure": "0", 
    "_dns_ms": 202, 
    "_connect_ms": 32, 
    "_ssl_ms": "-1", 
    "_gzip_total": "3180", 
    "_gzip_save": "0", 
    "_minify_total": "0", 
    "_minify_save": "0", 
    "_image_total": "0", 
    "_image_save": "0", 
    "_cache_time": "-1", 
    "_dns_start": "2395", 
    "_dns_end": "2597", 
    "_connect_start": "2600", 
    "_connect_end": "2632", 
    "_ssl_start": "0", 
    "_ssl_end": "0", 
    "_initiator": "http://echosdunet.disqus.com/embed.js?_=1451859532217", 
    "_initiator_line": "16", 
    "_initiator_column": "8205", 
    "_server_count": "4", 
    "_server_rtt": "32", 
    "_client_port": "62284", 
    "_jpeg_scan_count": "0", 
    "_full_url": "http://disqus.com/embed/comments/?base=default&version=f3e1717b71e7256da258d3a504e56865&f=echosdunet&t_i=node%2F19849&t_u=http%3A%2F%2Fwww.echosdunet.net%2Fnode%2F19849&t_e=Accueil&t_d=Comparatif%20et%20test%20ADSL%20et%20fibre&t_t=Accueil&s_o=default&l=fr", 
    "_score_progressive_jpeg": -1, 
    "_body": true, 
    "_load_end": 2850, 
    "_ttfb_start": "2632", 
    "_ttfb_end": 2740, 
    "_download_start": 2740, 
    "_download_end": 2850, 
    "_download_ms": 110, 
    "_all_start": "2395", 
    "_all_end": 2850, 
    "_all_ms": 452, 
    "_index": 55, 
    "_number": 56, 
    "_body_url": "/response_body.php?test=160101_10_KZQ1&run=1&cached=0&request=56" 
} 
正常に動作しますjsonPathリファラなしでクエリを実行する0

SELECT 
     JSON_EXTRACT(payload,"$._host")AS host, 
     url, 
     payload 
    FROM 
     [httparchive:har.2016_01_01_chrome_requests] 
    LIMIT 
     100 

答えて

1

その間、(BigQueryの標準SQLのために)、以下の多くのことができます

#standardSQL 
CREATE TEMPORARY FUNCTION CUSTOM_JSON_EXTRACT(json STRING, key String) 
RETURNS string 
LANGUAGE js AS """ 
    try { 
    var parsed = JSON.parse(json); 
    for (i = 0; i < parsed.length; i++) { 
     if (parsed[i].name == key) return parsed[i].value 
    } 
    } catch (e) {} 
    return null; 
"""; 
SELECT 
    JSON_EXTRACT(payload,"$._host")AS host, 
    CUSTOM_JSON_EXTRACT(JSON_EXTRACT(payload, "$.request.headers"), 'Referer') AS referer, 
    url, 
    payload 
FROM 
    `httparchive.har.2016_01_01_chrome_requests` 
LIMIT 
    100 
+0

おかげで、試してみてください。私は私のクエリは、将来のこの種の回避策でもっと複雑に見える必要があります:) – Kristian

1

JSON_EXTRACTJSON_EXTRACT_SCALARはJSONPath要素の完全なセットをサポートしていません。 documentationで詳細を読むことができます。 standard SQLを使用することをお勧めします。これは、JSON関数の実装がより良いエラーメッセージを持っているからです。

追加の種類のJSONPath要素に興味がある場合は、submitting a feature requestと考えることができます。

関連する問題