0
特定のサイトからjsonを取得するPythonクローラーを作成しました。ValueError:プロパティ名が二重引用符で囲まれています:行1列2(char 1)
データベースに保存できるようにデータを抽出できるようにファイルを作成しようとしました。抽出スクリプトの
機能:機能の後
s = page_ad.findAll('script')[27].text.replace('\'', '"')
s = re.search(r'\{.+\}', s, re.DOTALL).group() # get json data
s = re.sub(r'//.+\n', '', s) # replace comment
s = re.sub(r'\s+', '', s) # strip whitspace
s = re.sub(r',}', '}', s) # get rid of last , in the dict
結果:
{varsource="".toLowerCase();if(mobileSources.indexOf(source)!=-1){returntrue;}returnfalse;}functiongetSource(){varmsiteSources=["mobile","msite"];varuserAgent=navigator.userAgent.toLowerCase();varsource="".toLowerCase();if(mobileSources.indexOf(source)!=-1){if(msiteSources.indexOf(source)!=-1){source="msite";varresultMatch=userAgent.match(/\olx-source\/(\w+);/);if(resultMatch){source=resultMatch[1];}}}else{source="web";}returnsource;}dataLayer=function(){varinitialDatalayer={"config":{"lurkerURL":"},"site":{"isMobile":isMobile(),"source":getSource()},"page":{"pageType":"ad_detail","detail":{"parent_category_id":"2000","category_id":"2020","state_id":"2","region_id":"31","ad_id":"382568903","list_id":"314710679","city_id":"9238","zipcode":"32606174","price":"19900"},"adDetail":{"adID":"382568903","listID":"314710679","sellerName":"MichelleAlcântara","adDate":"2017-03-1113:10:55","mainCategory":"Veículosebarcos","mainCategoryID":"2000","subCategory":"Carros","subCategoryID":"2020","state":"MG","ddd":"31","region":"BeloHorizonteeregião","price":"19900"}},"session":{"user":{"userID":null,"loginType":null}},"pageType":"Ad_detail","abtestingEnable":"1","listingCategory":"2020","adId":"382568903","state":"2","region":"31","category":"2020","pictures":"5","listId":"314710679","loggedUser":"0","referrer":""};if(self.adParams){for(keyinadParams){varpage=initialDatalayer.page;page.detail[key]=adParams[key];if(page.adDetail){page.adDetail[key]=adParams[key];}}}return[initialDatalayer];}
しかし、私はそれが私にこのエラーを示しJSONに変換してみてください。
JSON変換:
dataLayer = json.loads(s)
メッセージエラー:
Traceback (most recent call last):
File "libs/olx/crawler_ads_information.py", line 100, in <module>
run(link_base)
File "libs/olx/crawler_ads_information.py", line 38, in run
information = getVehicleInformation(page_ad)
File "libs/olx/crawler_ads_information.py", line 49, in getVehicleInformation
dataLayer = json.loads(s)
File "/usr/lib/python2.7/json/__init__.py", line 339, in loads
return _default_decoder.decode(s)
File "/usr/lib/python2.7/json/decoder.py", line 364, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/usr/lib/python2.7/json/decoder.py", line 380, in raw_decode
obj, end = self.scan_once(s, idx)
ValueError: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)
何を見ていますか? – JacobIRR