2017-06-20 4 views
0

私が掻いているウェブページ上のインラインJavaスクリプトは、データを生成し、対応するdivに電話番号を送ります。 Webページのソースには数字が表示されないため、xパス、美しいスープなどを使ってデータをスクラップすることはできません。ページソースを分析しているうちに、JavaScriptはページソース内のスクリプト内でのみ生成され、ソースが保存されます。私のpythonを使用していますインラインJavaScriptからのデータの抽出

<script> 
QuidditaEnvironment.CurrentContactData={"ContactInfoName":null,"PhoneNumber1":"064/005-3708","PhoneNumber2":null,"City":null,"Address":"","Email":"[email protected]","ShowOtherContactData":false,"ShowContactPhone":true,"ShowMyAdsPage":false,"Advertiser":{"DisplayName":"korisnik-404772","PartyType":1,"Comment":null,"CreatedAt":"2014-10-27T10:31:01","LastModifiedAt":"2014-10-27T10:31:01","ActivatedAt":"2014-10-27T10:32:13","IsDeleted":false,"IsHost":false,"ProfileDescription":null,"AccountBalance":0.0,"AccountBalanceString":null,"AvatarImageURL":null,"IsAvatarValid":false,"AvatarImages":null,"ContactInfos":[{"Name":"Glavni","Address":"Beograd,Zage Malivuk 2 008","ZipCode":null,"City":null,"Country":null,"Phone1":null,"Phone2":null,"Longitude":null,"Latitude":null,"Email":"[email protected]","CreatedAt":"2014-10-27T10:31:01","LastModifiedAt":"2014-10-27T10:32:13","IsPrimary":true,"IsActive":true,"ShowOnMyAdsPage":true,"SequenceNumber":1,"ForHost":false,"Id":404772,"Version":3}],"StateName":"Aktivan","StateId":2,"IsInvestor":false,"IsSchool":false,"PartyGroupIds":[],"PartyGroupNames":[],"Address":null,"AdvertisingParty":true,"BlockAds":false,"NumberOfCVs":0,"EnforceIdentityUniqueness":true,"Slug":"korisnik-404772","Email":null,"SuppressAutomaticVerification":false,"Id":404772,"Version":4},"Latitudes":[],"Longitudes":[],"InvestorRoute":"nekretnine/novogradnja/korisnik-404772/projekti","UserAdsRoute":null,"SchoolRoute":"oglasi/korisnik-404772/posao/kursevi-i-obuke-pretraga","AvatarUrl":null,"NumberInRegister":null,"WebAddress":null,"Id":5425435147525,"Version":225,"Guid":"d93c6d04-5c96-4c80-aefb-fa644b1a1351"};QuidditaEnvironment.IsCurrentAdActive=true; 
QuidditaEnvironment.CurrentClassified={"RelativeUrl":"/nekretnine/prodaja-stanova/direktna-prodaja/5425435147525","HasAutomaticRenewal":false,"ValidToProlonged":null,"ShowInUnifiedAdvertiserAdList":true,"IsUsedMoveToTop":false,"ExpiresWithin48Hours":false,"UniqueId":"5425435147525_4","Id":"5425435147525","AdKindId":"4","IsPromoted":false,"IsInterestingInternal":false,"IsInterestingExternal":false,"InterestingEntryDate":null,"AdKindCode":"Premium","AdKindPosition":1,"StateId":101,"StoppageReasonIds":null,"StoppageReasonDescription":null,"Version":0,"Stamp":"2017-06-19T14:33:50.655Z","AdvertiserId":"404772","Title":"DIREKTNA PRODAJA","Text":"Hrastov parket,PVC stolarija,sigurnosna vrata,hodnik,spavaca soba,kupatilo,kuhinja sa prirodnom ventilacijom,dnevni boravak..klima..","TextHtml":"<p>Hrastov parket,PVC stolarija,sigurnosna vrata,hodnik,spavaca soba,kupatilo,kuhinja sa prirodnom ventilacijom,dnevni boravak..klima..</p>","PrintText":"","ContactInfoName":null,"PhoneNumber1":"+381640053708","PhoneNumber2":null,"Email":"True","Address":"","City":null,"ValidFrom":"2017-06-16T13:10:11Z","ValidFromForDisplay":"2017-06-16T13:10:11Z","ValidFromProlonged":null,"ValidTo":"2017-06-23T13:10:11Z","LastPublished":"2017-06-16T13:10:11Z","IsFirstOfKind":false,"CreatedAt":"2015-12-28T15:58:21Z","LastModifiedAt":"2017-06-16T13:09:42Z","IsArchived":false,"GeoLocationRPT":"44.801593,20.527653","ImageCount":6,"ImageURLs":["/slike/oglasi/Originals/160228/direktna-prodaja-uknjizen-stan-5425435147525-71779653263.jpg","/slike/oglasi/Originals/160228/direktna-prodaja-uknjizen-stan-5425435147525-71779653261.jpg","/slike/oglasi/Originals/160228/direktna-prodaja-uknjizen-stan-5425435147525-71779653262.jpg","/slike/oglasi/Originals/160228/direktna-prodaja-uknjizen-stan-5425435147525-71779653264.jpg","/slike/oglasi/Originals/160228/direktna-prodaja-uknjizen-stan-5425435147525-71779653265.jpg","/slike/oglasi/Originals/160228/direktna-prodaja-uknjizen-stan-5425435147525-71779653266.jpg"],"ImageTexts":["","","","","",""],"CategoryIds":[1,2,2001,12],"CategoryId":12,"CategoryHierarchyId":"1.2.2001.12.","CategoryNames":["Nekretnine","Stambeni prostor","Prodaja","Stan"],"CategoryFullName":"Nekretnine>Stambeni prostor>Prodaja>Stan","AdvertiserLogoUrlInternal":null,"AdvertiserLogoUrl":null,"VideoUrl":null,"CreatedByUserId":404772,"DeclarationId":404772,"EnclosureFilePath":null,"ListHTML":"&lt;div class=&quot;col-md-12 col-sm-12 col-xs-12 col-lg-12&quot;&gt;&lt;div class=&quot;row&quot;&gt;&lt;div class=&quot;product-item product-list-item Premium real-estates my-ad-placeholder&quot; data-id=&quot;5425435147525&quot; id=&quot;5425435147525&quot;&gt;&lt;div class=&quot;my-ad-sticker&quot;&gt;&lt;/div&gt;&lt;div class=&quot;small-arrow-map-ad&quot;&gt;&lt;/div&gt;&lt;div class=&quot;central-feature&quot;&gt;&lt;span data-value=&quot;31.000&quot;&gt;&lt;i&gt;31.000&amp;nbsp;€&lt;/i&gt;&lt;/span&gt;&lt;div class=&quot;price-shadow&quot;&gt;&lt;img src=&quot;/Content/assets/frontend/layout/img/price-shadow.png&quot; style=&quot;width:100%; height:10px;&quot; /&gt;&lt;/div&gt;&lt;/div&gt;&lt;div class=&quot;col-md-4 col-sm-5 col-xs-4 col-lg-4&quot;&gt;&lt;div class=&quot;product-type&quot;&gt;&lt;/div&gt;&lt;figure class=&quot;pi-img-wrapper&quot;&gt;&lt;a class=&quot;a-images&quot; href=&quot;/nekretnine/prodaja-stanova/direktna-prodaja/5425435147525&quot;&gt;&lt;img src=&#39;https://img.halooglasi.com//slike/oglasi/Thumbs/160228/m/direktna-prodaja-uknjizen-stan-5425435147525-71779653263.jpg&#39; class=&quot;&quot; alt=&quot;DIREKTNA PRODAJA&quot; onError=&quot;this.onerror = null; this.src = &amp;#39;/Content/Quiddita/Widgets/Product/Stylesheets/img/no-image.jpg&amp;#39;&quot;&gt;&lt;/a&gt;&lt;/figure&gt;&lt;div class=&quot;pi-img-wrapper-under&quot;&gt;&lt;span class=&quot;publish-date&quot;&gt;16.06.2017&lt;/span&gt;&lt;span class=&quot;basic-info&quot;&gt;&lt;span data-field-name=&#39;oglasivac_nekretnine_s&#39; data-field-value=&#39;vlasnik&#39;&gt;Vlasnik&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;div class=&quot;clear&quot;&gt;&lt;/div&gt;&lt;/div&gt;&lt;/div&gt;&lt;div class=&quot;col-md-6 col-sm-5 col-xs-6 col-lg-6 sm-margin&quot;&gt;&lt;h3 class=&quot;ad-title&quot;&gt;&lt;a href=&quot;/nekretnine/prodaja-stanova/direktna-prodaja/5425435147525&quot;&gt;DIREKTNA PRODAJA&lt;/a&gt;&lt;/h3&gt;&lt;ul class=&quot;subtitle-places&quot;&gt;&lt;li&gt;Beograd&amp;nbsp;&lt;/li&gt;&lt;li&gt;Opština Zvezdara&amp;nbsp;&lt;/li&gt;&lt;li&gt;Mirijevo&amp;nbsp;&lt;/li&gt;&lt;/ul&gt;&lt;ul class=&quot;ad-features &quot;&gt;&lt;li class=&#39;col-p-1-3&#39;&gt;&lt;div class=&#39;value-wrapper&#39;&gt;Stan&amp;nbsp;&lt;span class=&#39;legend&#39;&gt;Tip nekretnine&lt;/span&gt;&lt;/div&gt;&lt;/li&gt;&lt;li class=&#39;col-p-1-3&#39;&gt;&lt;div class=&#39;value-wrapper&#39;&gt;39&amp;nbsp;m&lt;sup&gt;2&lt;/sup&gt;&lt;span class=&#39;legend&#39;&gt;Kvadratura&lt;/span&gt;&lt;/div&gt;&lt;/li&gt;&lt;li class=&#39;col-p-1-3&#39;&gt;&lt;div class=&#39;value-wrapper&#39;&gt;2.0&amp;nbsp;&lt;span class=&#39;legend&#39;&gt;Broj soba&lt;/span&gt;&lt;/div&gt;&lt;/li&gt;&lt;/ul&gt;&lt;div class=&quot;clear&quot;&gt;&lt;/div&gt;&lt;p class=&quot;text-description-list ad-description short-desc&quot;&gt;Hrastov parket,PVC stolarija,sigurnosna vrata,hodnik,spavaca soba,kupatilo,kuhinja sa prirodnom ventilacijom,dnevni boravak..klima..&lt;/p&gt;&lt;/div&gt;&lt;div class=&quot;btns&quot;&gt;&lt;button type=&quot;button&quot; class=&quot;btn btn-circle btn-fav-ad-star fav-cmd favorite-ad-holder&quot; data-id=&quot;5425435147525&quot;&gt;&lt;/button&gt;&lt;span class=&quot;on-map&quot;&gt;&lt;/span&gt;&lt;div class=&quot;clear&quot;&gt;&lt;/div&gt;&lt;/div&gt;&lt;/div&gt;&lt;/div&gt;&lt;/div&gt;","GridHTML":"&lt;div class=&quot;col-md-6 col-sm-6 col-xs-12&quot;&gt;&lt;div class=&quot;product-item product-grid-item Premium real-estates my-ad-placeholder&quot;&gt;&lt;figure class=&quot;pi-img-wrapper&quot;&gt;&lt;a class=&quot;a-images&quot; href=&quot;/nekretnine/prodaja-stanova/direktna-prodaja/5425435147525&quot;&gt;&lt;img src=&#39;https://img.halooglasi.com//slike/oglasi/Thumbs/160228/m/direktna-prodaja-uknjizen-stan-5425435147525-71779653263.jpg&#39; class=&quot;&quot; alt=&quot;DIREKTNA PRODAJA&quot; onError=&quot;this.onerror = null; this.src = &amp;#39;/Content/Quiddita/Widgets/Product/Stylesheets/img/no-image.jpg&amp;#39;&quot;&gt;&lt;/a&gt;&lt;span class=&quot;on-map&quot;&gt;&lt;/span&gt;&lt;div class=&quot;wrap-btn-fav&quot;&gt;&lt;div class=&quot;btn-group btn-group-solid&quot;&gt;&lt;button type=&quot;button&quot; class=&quot;btn btn-circle btn-fav-ad-star fav-cmd favorite-ad-holder&quot; data-id=&quot;5425435147525&quot;&gt;&lt;/button&gt;&lt;/div&gt;&lt;/div&gt;&lt;div class=&quot;product-type&quot;&gt;&lt;/div&gt;&lt;/figure&gt;&lt;div class=&quot;central-feature&quot;&gt;&lt;span data-value=&quot;31.000&quot;&gt;&lt;i&gt;31.000&amp;nbsp;€&lt;/i&gt;&lt;/span&gt;&lt;div class=&quot;price-shadow&quot;&gt;&lt;img src=&quot;/Content/assets/frontend/layout/img/price-shadow.png&quot; style=&quot;width:100%; height:10px;&quot; /&gt;&lt;/div&gt;&lt;/div&gt;&lt;h3 class=&quot;ad-title&quot;&gt;&lt;a href=&quot;/nekretnine/prodaja-stanova/direktna-prodaja/5425435147525&quot;&gt;DIREKTNA PRODAJA&lt;/a&gt;&lt;/h3&gt;&lt;ul class=&quot;subtitle-places&quot;&gt;&lt;li&gt;Beograd&amp;nbsp;&lt;/li&gt;&lt;li&gt;Opština Zvezdara&amp;nbsp;&lt;/li&gt;&lt;li&gt;Mirijevo&amp;nbsp;&lt;/li&gt;&lt;/ul&gt;&lt;p class=&quot;ad-description&quot;&gt;Hrastov parket,PVC stolarija,sigurnosna vrata,hodnik,spavaca soba,kupatilo,kuhinja sa prirodnom ventilacijom,dnevni boravak..klima..&lt;/p&gt;&lt;div class=&quot;clear&quot;&gt;&lt;/div&gt;&lt;div class=&quot;clear&quot;&gt;&lt;/div&gt;&lt;/div&gt;&lt;/div&gt;","DoNotShowContactButton":null,"ContactButtonLink":null,"OtherFields":{"broj_soba_s":"2.0","grejanje_s":"EG","grad_s":"Beograd","lokacija_s":"Opština Zvezdara","mikrolokacija_s":"Mirijevo","kvadratura_d":39.0,"oglasivac_nekretnine_s":"Vlasnik","stanje_objekta_s":"Izvorno stanje","tip_nekretnine_s":"Stan","cena_d":31000.0,"dodatno_ss":["Odmah useljiv","Uknjižen"],"ostalo_ss":["Klima","Telefon","KATV","Internet","Interfon","Parking","Francuski balkon"],"tip_objekta_s":"Novogradnja","sprat_s":"3","sprat_od_s":"3","broj_soba_id_l":401,"grejanje_id_l":1543,"grad_id_l":35112,"lokacija_id_l":40788,"mikrolokacija_id_l":531542,"oglasivac_nekretnine_id_l":387237,"stanje_objekta_id_l":260581,"tip_nekretnine_id_l":8100000,"dodatno_id_ls":[12000001,12000004],"ostalo_id_ls":[12100002,12100010,12100011,12100012,12100013,12100017,12100018],"tip_objekta_id_l":387235,"sprat_id_l":12441300,"sprat_od_id_l":12441350,"broj_soba_order_i":4,"sprat_order_i":13,"kvadratura_d_unit_s":"m2","cena_d_unit_s":"EUR","defaultunit_kvadratura_d":39.0,"defaultunit_cena_d":31000.0,"_version_":1570636437658796032},"IsVerificationPending":false,"VerificationStateId":2,"InfoMessage":null,"TotalViews":21098,"TopCategoryCSSClass":null,"JobApplicationCount":0,"ShowAdvertiserAdsLink":false,"ShowMyAvatar":true,"IsOwnedByCurrentUser":false,"ThreeDTourExists":false,"UseRaiffeisenCreditCalculator":false,"CreditInstalment":null,"CreditTotalAmount":null}; for (var i in QuidditaEnvironment.CurrentClassified.OtherFields) { QuidditaEnvironment.CurrentClassified[i] = QuidditaEnvironment.CurrentClassified.OtherFields[i]; }; 
QuidditaEnvironment.IsUserOperator=false 

QuidditaEnvironment.CurrentClassifiedInstances=[{"RelativeUrl":null,"HasAutomaticRenewal":false,"ValidToProlonged":null,"ShowInUnifiedAdvertiserAdList":false,"IsUsedMoveToTop":false,"ExpiresWithin48Hours":false,"UniqueId":null,"Id":null,"AdKindId":"4","IsPromoted":false,"IsInterestingInternal":false,"IsInterestingExternal":false,"InterestingEntryDate":null,"AdKindCode":"Premium","AdKindPosition":1,"StateId":101,"StoppageReasonIds":null,"StoppageReasonDescription":null,"Version":0,"Stamp":"0001-01-01T00:00:00","AdvertiserId":null,"Title":"DIREKTNA PRODAJA","Text":null,"TextHtml":null,"PrintText":null,"ContactInfoName":null,"PhoneNumber1":null,"PhoneNumber2":null,"Email":null,"Address":null,"City":null,"ValidFrom":"2017-06-16T13:10:11Z","ValidFromForDisplay":null,"ValidFromProlonged":null,"ValidTo":"2017-06-23T13:10:11Z","LastPublished":"2017-06-16T13:10:11Z","IsFirstOfKind":false,"CreatedAt":"0001-01-01T00:00:00","LastModifiedAt":null,"IsArchived":false,"GeoLocationRPT":null,"ImageCount":null,"ImageURLs":null,"ImageTexts":null,"CategoryIds":null,"CategoryId":0,"CategoryHierarchyId":null,"CategoryNames":["Nekretnine","Stambeni prostor","Prodaja","Stan"],"CategoryFullName":null,"AdvertiserLogoUrlInternal":null,"AdvertiserLogoUrl":null,"VideoUrl":null,"CreatedByUserId":0,"DeclarationId":null,"EnclosureFilePath":null,"ListHTML":null,"GridHTML":null,"DoNotShowContactButton":null,"ContactButtonLink":null,"OtherFields":null,"IsVerificationPending":false,"VerificationStateId":0,"InfoMessage":null,"TotalViews":0,"TopCategoryCSSClass":null,"JobApplicationCount":0,"ShowAdvertiserAdsLink":false,"ShowMyAvatar":false,"IsOwnedByCurrentUser":false,"ThreeDTourExists":false,"UseRaiffeisenCreditCalculator":false,"CreditInstalment":null,"CreditTotalAmount":null}</script>] 

、セレン及びphantom.jsがデータをこすりに時間がかかりすぎる:これは、特定のスクリプトです。ソースコードからこのインラインJavaScriptを解析してデータを抽出することは可能ですか?

答えて

1

可能です。

信じられないほど痛い、またはい。

単純なケースでは、単純な正規表現チェックを行うことができます。 <script>タグの内容を取得して、/[a-z][a-z0-9\.]+\s*=\s*(.*);/のようなものを探します。しかし、その正規表現は間違いなくすべてのケースを取得します。そこから

、あなたはJSONを特定し、解析のようなものを行う、他のケースを処理するためのものを持っている必要があるだろう、など

あなたが終わった頃には、あなたはほぼ全体のJSを書かれていますよパーサ。

SeleniumとPhantom.JSを超えて進む可能性のあるルートの1つは、Nodeプロセスにフィードを送り、それを設定したグローバルを見てみることです。しかし、例のスニペットの場合と同様に、いくつかのエラーが発生する可能性があります。

正直言って、SeleniumやPhantom.jsのようなものを使用して、あなたのためにそれらを十分に速くする方法を理解することをお勧めします。

関連する問題