2017-05-20 18 views
0

私はページからJSON要素を取得しようとしています:ウェブページのスクリプトタグからjson要素を取得するにはどうすればよいですか?

import json 
    import requests 
    import pandas as pd 
    import re 
    from bs4 import BeautifulSoup as bs 

    url='https://www.nemlig.com/varer/dagligvarer/frugt-og-groent/frugt-baer' 
    url 

    page = requests.get(url) 
    soupH = bs(page.content, 'html.parser') 

    scripts=soupH.find_all('script') 

    p=re.compile('contentAsJson') 

    cn=0 
    cont_js="" 
    for sc in scripts: 
     cn+=1 
     #print(cn,sc) 
     if p.search(str(sc)): 
      cont_js=sc 
      #print(cont_js) 

    cont_js 

しかし、どのように私は(同上、PoductGroupIdとCombinedProductsAndSitecoreTimestamp)のvaaluesを抽出んJSONスクリプトから?

cont_js戻り、この:

var contentAsJson = { "MetaData": {"Id":"f905e457-c461-4750-880b-bbe6f2d5f5eb","Name":"Frugt-baer","DisplayName":"","TemplateId":"25997ff0-7455-4d0f-bd48-3de2f3a5c3d8","TemplateName":"Filter page","Versions":1,"Language":"da","ContentPath":"/Site/Varer/Dagligvarer/Frugt-og-groent/Frugt-baer","FullPath":"/sitecore/content/Site/Varer/Dagligvarer/Frugt-og-groent/Frugt-baer","Path":"/sitecore/content/Site/Varer/Dagligvarer/Frugt-og-groent/Frugt-baer","Url":"https://www.nemlig.com/varer/dagligvarer/frugt-og-groent/frugt-baer","Header":"Frugt og bær","AutoHideMobileNavigation":false,"PageTitle":"Oversigt over frugt og bær - Dagligvarer når det passer dig!","MetaKeyWords":"Frugt og bær","MetaDescription":"Udvalg af frugt og bær hos nemlig.com - Modtag dagligvarer når det passer dig, levering direkte til døren. Køb frugt og bær hos nemlig.com","CategoryPath":null,"BackgroundImageForJson":null,"ResponseCode":200,"NavigationTitle":"Oversigt over frugt og bær - Dagligvarer når det passer dig!","IncludeInNavigation":true,"IncludeInSitemap":true,"TopAdvertisement":null,"LeftAdvertisement":null,"MenuContextId":"f905e457-c461-4750-880b-bbe6f2d5f5eb","TrackingId":"f905e457-c461-4750-880b-bbe6f2d5f5eb","AuthenticationRequired":false}, "Settings": {"BuildVersion":"b64.2.0.0","SitecorePublishedStamp":"4M-2z7ue","ProductsImportedTimestamp":"4BNhsQiH","CombinedProductsAndSitecoreTimestamp":"4BNhsQiH-4M-2z7ue","UserId":null,"ZipCode":"0","DeliveryZoneId":1,"TimeslotUtc":"2017052108-60-600","StaticResourcesPath":"https://live.nemligstatic.com/s/b64.2.0.0/scom/dist"}, "content":[{"Id":"ca3b1f74-a65e-41ce-9df7-6d6380d4a895","VisibleOnMobile":true,"TemplateName":"productlistshowallspot","TemplateId":"15135d3e-4a63-478c-ac4d-d3705963ced0","Heading":"Bær","ProductGroupId":"e588f992-c387-44b4-95ed-fc7e385796e7","TotalProducts":11},{"Id":"c6d7c49e-7d54-4a6c-917b-2f14725a87d2","VisibleOnMobile":true,"TemplateName":"productlistshowallspot","TemplateId":"15135d3e-4a63-478c-ac4d-d3705963ced0","Heading":"Udskåret frugt","ProductGroupId":"0bba003e-4cd0-4161-ba43-3dd90c0e9095","TotalProducts":5},{"Id":"9f7d5644-c3f0-4cd2-8324-8d23530cb95d","VisibleOnMobile":true,"TemplateName":"productlistshowallspot","TemplateId":"15135d3e-4a63-478c-ac4d-d3705963ced0","Heading":"Eksotisk frugt","ProductGroupId":"14069d07-9bf1-4651-ba04-744da98ea9d6","TotalProducts":20},{"Id":"7e0c336f-78b1-41d6-beab-b02b824a22d0","VisibleOnMobile":true,"TemplateName":"productlistshowallspot","TemplateId":"15135d3e-4a63-478c-ac4d-d3705963ced0","Heading":"Druer, kiwi og stenfrugter","ProductGroupId":"6c777f89-fbb2-4d33-b170-6e2586009a16","TotalProducts":12},{"Id":"62e71c2b-1feb-4771-a7e0-2cbfc70a7326","VisibleOnMobile":true,"TemplateName":"productlistshowallspot","TemplateId":"15135d3e-4a63-478c-ac4d-d3705963ced0","Heading":"Meloner","ProductGroupId":"131a255e-437b-43f5-97b6-0e214ab33a0f","TotalProducts":10},{"Id":"ad2dc012-ad6f-458a-91a2-8770bdcdaa0c","VisibleOnMobile":true,"TemplateName":"productlistshowallspot","TemplateId":"15135d3e-4a63-478c-ac4d-d3705963ced0","Heading":"Citrusfrugt","ProductGroupId":"ea195351-c44a-4158-85f0-4cfbb890978b","TotalProducts":11},{"Id":"3aa05883-2d23-4e1c-8bb1-67acdf3cfc17","VisibleOnMobile":true,"TemplateName":"productlistshowallspot","TemplateId":"15135d3e-4a63-478c-ac4d-d3705963ced0","Heading":"Pærer","ProductGroupId":"dc604784-c871-4d0e-bc88-2e6ceb8ac044","TotalProducts":4},{"Id":"0cd2cb71-5ace-479b-ad69-aefb01100bbd","VisibleOnMobile":true,"TemplateName":"productlistshowallspot","TemplateId":"15135d3e-4a63-478c-ac4d-d3705963ced0","Heading":"Bananer","ProductGroupId":"97e40d96-bbee-4a32-89e8-d2d05e522b7e","TotalProducts":4},{"Id":"b815f5ef-2d2a-44f6-bedd-0704101b1836","VisibleOnMobile":true,"TemplateName":"productlistshowallspot","TemplateId":"15135d3e-4a63-478c-ac4d-d3705963ced0","Heading":"Æbler","ProductGroupId":"183ad2d6-6917-4268-84d7-3718f0dcc27a","TotalProducts":8},{"Id":"174835ea-c986-480c-b2ad-303cf129da12","VisibleOnMobile":true,"TemplateName":"productlistonerowspot","TemplateId":"17e165e8-b722-4da5-a0c8-8881a4ce4e8c","Heading":"Frossen frugt","SeeMoreLink":{"Anchor":"","Class":"","Text":"Se al frossen frugt","Query":"","Title":"Se al frossen frugt","Url":"/varer/dagligvarer/frost/groent-frugt-frost/frugt-og-baer","Target":"","TargetId":"cd577472-03d9-4ddc-8fc6-632e6d72b30c","Type":"internal"},"ProductGroupId":"426f6f21-91e5-4101-9867-0e7a2aa29bd4","TotalProducts":19},{"Id":"0b855f72-b3e8-4bd1-bc23-61cbda4c7002","TemplateName":"recipelistonerowspot","TemplateId":"f804ee41-029a-4dfe-9247-4fe8c421e637","Heading":"Årstidens frugt- og grøntpakker","SeeMoreLink":{"Anchor":"","Class":"","Text":"Aarstidens-frugt-og-groent-pakker","Query":"","Title":"","Url":"/varer/dagligvarer/frugt-og-groent/aarstidens-frugt-og-groent-pakker","Target":"","TargetId":"72c3abce-d57a-45db-9dcb-39ad103a848e","Type":"internal"},"RecipeGroupId":"a222d64e-a5f1-44ed-8dbd-21b8f643ef63","ContextId":"baer","TotalRecipes":6}], "aside":[ ] };
+0

あなたはJSON.parse(jsonString)を使用することができます。 Pythonでは、これはjson.loads(json_string) – fen1x

+0

Thanx。で行うことができますが、これはjson_stringにjsonしか含まれていないことがわかります。しかし、上記のcont_jsが返すsringからjsonコードをどのように抽出すればよいですか? –

答えて

0

私はあなたが与えた例から理解として - あなたはこのようなものが含まれている文字列cont_jsありますのみ JSONエンコードされた文字列を取得するには

'var contentAsJson = {...encoded string...}' 

を、それをカット:

json_string = cont_js[20:] 

デコードJSON:

decoded_string = json.loads(json_string) 

そして、あなたのデータにアクセスすることができますJSで

id = decoded_string['MetaData']['Id'] 
+0

bs4.element.Tag –

+0

TypeError:unhashable type: 'slice'を取得しました。 cont_jsはbs4.element.tagです –

+0

もし私がそれを文字列にしようとすると、cont_js.stringのような文字列が得られます: –

関連する問題