2016-12-01 19 views
1

サイトを掻いていて、Python Regexを使用してJSコードでdata変数のJSONを抽出します。Regexを使用して2つの文字列の間の文字列を抽出します

<script type="text/javascript"> 
P.when('A').register("ImageBlockATF", function(A){ 
    var data = { 
       'colorImages': { 'initial': [{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/81Oo79kGp2L._SL1500_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/41SnVVzKChL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/41SnVVzKChL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/81Oo79kGp2L._SY355_.jpg":[355,270],"https://images-na.ssl-images-amazon.com/images/I/81Oo79kGp2L._SY450_.jpg":[450,342],"https://images-na.ssl-images-amazon.com/images/I/81Oo79kGp2L._SY550_.jpg":[550,419],"https://images-na.ssl-images-amazon.com/images/I/81Oo79kGp2L._SY606_.jpg":[606,461],"https://images-na.ssl-images-amazon.com/images/I/81Oo79kGp2L._SY679_.jpg":[679,517]},"variant":"MAIN","lowRes":null},{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/81%2BGc-r4gLL._SL1500_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/416rXB0xcmL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/416rXB0xcmL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/81%2BGc-r4gLL._SY355_.jpg":[355,276],"https://images-na.ssl-images-amazon.com/images/I/81%2BGc-r4gLL._SY450_.jpg":[450,349],"https://images-na.ssl-images-amazon.com/images/I/81%2BGc-r4gLL._SX425_.jpg":[547,425],"https://images-na.ssl-images-amazon.com/images/I/81%2BGc-r4gLL._SX466_.jpg":[600,466],"https://images-na.ssl-images-amazon.com/images/I/81%2BGc-r4gLL._SX522_.jpg":[672,522]},"variant":"PT01","lowRes":null},{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/817slrgsGbL._SL1500_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/51gQxeLTYhL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/51gQxeLTYhL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/817slrgsGbL._SX355_.jpg":[251,355],"https://images-na.ssl-images-amazon.com/images/I/817slrgsGbL._SX450_.jpg":[318,450],"https://images-na.ssl-images-amazon.com/images/I/817slrgsGbL._SX425_.jpg":[300,425],"https://images-na.ssl-images-amazon.com/images/I/817slrgsGbL._SX466_.jpg":[329,466],"https://images-na.ssl-images-amazon.com/images/I/817slrgsGbL._SX522_.jpg":[369,522]},"variant":"PT02","lowRes":null},{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/71r3nXKZBmL._SL1500_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/41d9m8J4MbL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/41d9m8J4MbL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/71r3nXKZBmL._SX355_.jpg":[142,355],"https://images-na.ssl-images-amazon.com/images/I/71r3nXKZBmL._SX450_.jpg":[180,450],"https://images-na.ssl-images-amazon.com/images/I/71r3nXKZBmL._SX425_.jpg":[170,425],"https://images-na.ssl-images-amazon.com/images/I/71r3nXKZBmL._SX466_.jpg":[187,466],"https://images-na.ssl-images-amazon.com/images/I/71r3nXKZBmL._SX522_.jpg":[209,522]},"variant":"PT03","lowRes":null},{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/81Uys4ccU4L._SL1500_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/41zh%2BCGamHL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/41zh%2BCGamHL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/81Uys4ccU4L._SY355_.jpg":[355,260],"https://images-na.ssl-images-amazon.com/images/I/81Uys4ccU4L._SY450_.jpg":[450,330],"https://images-na.ssl-images-amazon.com/images/I/81Uys4ccU4L._SY550_.jpg":[550,403],"https://images-na.ssl-images-amazon.com/images/I/81Uys4ccU4L._SY606_.jpg":[606,444],"https://images-na.ssl-images-amazon.com/images/I/81Uys4ccU4L._SY679_.jpg":[679,498]},"variant":"PT04","lowRes":null},{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/8179KoLoyGL._SL1500_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/41sMHp-WegL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/41sMHp-WegL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/8179KoLoyGL._SY355_.jpg":[355,258],"https://images-na.ssl-images-amazon.com/images/I/8179KoLoyGL._SY450_.jpg":[450,327],"https://images-na.ssl-images-amazon.com/images/I/8179KoLoyGL._SY550_.jpg":[550,400],"https://images-na.ssl-images-amazon.com/images/I/8179KoLoyGL._SY606_.jpg":[606,441],"https://images-na.ssl-images-amazon.com/images/I/8179KoLoyGL._SY679_.jpg":[679,494]},"variant":"PT05","lowRes":null},{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/71Sw2wrvy6L._SL1364_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/416TFrjOFlL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/416TFrjOFlL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/71Sw2wrvy6L._SX355_.jpg":[231,355],"https://images-na.ssl-images-amazon.com/images/I/71Sw2wrvy6L._SX450_.jpg":[293,450],"https://images-na.ssl-images-amazon.com/images/I/71Sw2wrvy6L._SX425_.jpg":[277,425],"https://images-na.ssl-images-amazon.com/images/I/71Sw2wrvy6L._SX466_.jpg":[304,466],"https://images-na.ssl-images-amazon.com/images/I/71Sw2wrvy6L._SX522_.jpg":[340,522]},"variant":"PT06","lowRes":null},{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/61GKBhtPKPL._SL1341_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/41%2BNMI0l9yL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/41%2BNMI0l9yL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/61GKBhtPKPL._SX355_.jpg":[190,355],"https://images-na.ssl-images-amazon.com/images/I/61GKBhtPKPL._SX450_.jpg":[240,450],"https://images-na.ssl-images-amazon.com/images/I/61GKBhtPKPL._SX425_.jpg":[227,425],"https://images-na.ssl-images-amazon.com/images/I/61GKBhtPKPL._SX466_.jpg":[249,466],"https://images-na.ssl-images-amazon.com/images/I/61GKBhtPKPL._SX522_.jpg":[279,522]},"variant":"PT07","lowRes":null},{"hiRes":null,"thumb":"https://images-na.ssl-images-amazon.com/images/I/41ziorm06nL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/41ziorm06nL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/41ziorm06nL._SY355_.jpg":[355,266],"https://images-na.ssl-images-amazon.com/images/I/41ziorm06nL._SY450_.jpg":[450,338],"https://images-na.ssl-images-amazon.com/images/I/41ziorm06nL.jpg":[500,375]},"variant":"AW01","lowRes":null},{"hiRes":null,"thumb":"https://images-na.ssl-images-amazon.com/images/I/41lZ6jtPe%2BL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/41lZ6jtPe%2BL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/41lZ6jtPe%2BL._SY355_.jpg":[355,266],"https://images-na.ssl-images-amazon.com/images/I/41lZ6jtPe%2BL._SY450_.jpg":[450,338],"https://images-na.ssl-images-amazon.com/images/I/41lZ6jtPe%2BL.jpg":[500,375]},"variant":"AW02","lowRes":null},{"hiRes":null,"thumb":"https://images-na.ssl-images-amazon.com/images/I/51JqQcNGjUL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/51JqQcNGjUL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/51JqQcNGjUL._SY355_.jpg":[355,355],"https://images-na.ssl-images-amazon.com/images/I/51JqQcNGjUL._SY450_.jpg":[450,450],"https://images-na.ssl-images-amazon.com/images/I/51JqQcNGjUL._SX425_.jpg":[425,425],"https://images-na.ssl-images-amazon.com/images/I/51JqQcNGjUL._SX466_.jpg":[466,466],"https://images-na.ssl-images-amazon.com/images/I/51JqQcNGjUL.jpg":[500,500]},"variant":"AW03","lowRes":null}]}, 
       'colorToAsin': {'initial': {}}, 
       'holderRatio': 1.0, 
       'holderMaxHeight': 700, 
       'heroImage': {'initial': []}, 
       'weblabs' : {} 
       }; 
    A.trigger('P.AboveTheFold'); // trigger ATF event. 
    return data; 
}); 
</script> 

私は正規表現に従っていますが、動作していません。

(var\s+data\s+=).*^[A.trigger('P.AboveTheFold')]$ 

は基本的に私はあなたのJSONデータがどの;が含まれていないあなたは確信している場合はvar data =

答えて

1

A.trigger('P.AboveTheFold')の間で文字列をつかむために正規表現を必要とする、あなたが書くことができます。

var data\s*=\s*([^;]*}); 

それはあります非常に堅牢ではありません。おそらく解析ライブラリを使用するべきでしょう。 jsonデータは第1グループの中にあります。

hereを参照してください。

あなたはあなたのデータはvar data =A.trigger('P.AboveTheFold')の間にあることを確認している場合は、あなたが使用することができます。

(?<=var data =).*(?=A.trigger\('P\.AboveTheFold'\)) 

はそれthere参照してください。

jsonデータは、肯定的なルックアラウンドのお陰で完全一致です。 これは堅牢ではありません。データと=の間の任意の異なる間隔は、たとえばそれを壊すでしょう。 .が改行に一致するようにPythonに指示するには、re.DOTALLフラグが必要です。

関連する問題