2017-05-19 6 views
0

特定のサイトからjsonを取得するPythonクローラーを作成しました。ValueError:プロパティ名が二重引用符で囲まれています:行1列2(char 1)

データベースに保存できるようにデータを抽出できるようにファイルを作成しようとしました。抽出スクリプトの

機能:機能の後

s = page_ad.findAll('script')[27].text.replace('\'', '"') 
s = re.search(r'\{.+\}', s, re.DOTALL).group() # get json data 
s = re.sub(r'//.+\n', '', s) # replace comment 
s = re.sub(r'\s+', '', s) # strip whitspace 
s = re.sub(r',}', '}', s) # get rid of last , in the dict 

結果:

{varsource="".toLowerCase();if(mobileSources.indexOf(source)!=-1){returntrue;}returnfalse;}functiongetSource(){varmsiteSources=["mobile","msite"];varuserAgent=navigator.userAgent.toLowerCase();varsource="".toLowerCase();if(mobileSources.indexOf(source)!=-1){if(msiteSources.indexOf(source)!=-1){source="msite";varresultMatch=userAgent.match(/\olx-source\/(\w+);/);if(resultMatch){source=resultMatch[1];}}}else{source="web";}returnsource;}dataLayer=function(){varinitialDatalayer={"config":{"lurkerURL":"},"site":{"isMobile":isMobile(),"source":getSource()},"page":{"pageType":"ad_detail","detail":{"parent_category_id":"2000","category_id":"2020","state_id":"2","region_id":"31","ad_id":"382568903","list_id":"314710679","city_id":"9238","zipcode":"32606174","price":"19900"},"adDetail":{"adID":"382568903","listID":"314710679","sellerName":"MichelleAlcântara","adDate":"2017-03-1113:10:55","mainCategory":"Veículosebarcos","mainCategoryID":"2000","subCategory":"Carros","subCategoryID":"2020","state":"MG","ddd":"31","region":"BeloHorizonteeregião","price":"19900"}},"session":{"user":{"userID":null,"loginType":null}},"pageType":"Ad_detail","abtestingEnable":"1","listingCategory":"2020","adId":"382568903","state":"2","region":"31","category":"2020","pictures":"5","listId":"314710679","loggedUser":"0","referrer":""};if(self.adParams){for(keyinadParams){varpage=initialDatalayer.page;page.detail[key]=adParams[key];if(page.adDetail){page.adDetail[key]=adParams[key];}}}return[initialDatalayer];} 

しかし、私はそれが私にこのエラーを示しJSONに変換してみてください。

JSON変換:

dataLayer = json.loads(s) 

メッセージエラー:

Traceback (most recent call last): 
    File "libs/olx/crawler_ads_information.py", line 100, in <module> 
    run(link_base) 
    File "libs/olx/crawler_ads_information.py", line 38, in run 
    information = getVehicleInformation(page_ad) 
    File "libs/olx/crawler_ads_information.py", line 49, in getVehicleInformation 
    dataLayer = json.loads(s) 
    File "/usr/lib/python2.7/json/__init__.py", line 339, in loads 
    return _default_decoder.decode(s) 
    File "/usr/lib/python2.7/json/decoder.py", line 364, in decode 
    obj, end = self.raw_decode(s, idx=_w(s, 0).end()) 
    File "/usr/lib/python2.7/json/decoder.py", line 380, in raw_decode 
    obj, end = self.scan_once(s, idx) 
ValueError: Expecting property name enclosed in double quotes: line 1 column 2 (char 1) 
+0

何を見ていますか? – JacobIRR

答えて

0

JSONは、直列化されたデータ構造だけでなく、普通のJavaScriptコードです。 Pythonの辞書に "変換" することができ

{"key" : value, "key2" : "value2_string"} 

これは有効なJSONです。

loadsにしようとしている文字列は単なるjavascriptコードです。

あなたはここでJSONについての詳細情報を取得することができますときにちょうど `印刷S`あなたはhttp://json.org/

関連する問題