2017-03-04 17 views
-1

内の文字列からすべてのURLを取得:私はこのような文字列を持っているのpython

[ 
{ 
"profilechecksum":"58cd944da7b8e647abdcdb722d74fc7ai9060852", 
"userloginstatus":"Last Online today", 
"subscription_text":null, 
"subscription_icon":null, 
"age":"22 Years", 
"username":"ZZXS8433", 
"height":"5' 2\" ", 
"occupation":"Not working", 
"caste":"Namasudra\/Namosudra", 
"income":"No Income", 
"mtongue":"Bengali", 
"edu_level_new":"High School", 
"location":"Kolkata", 
"photo":{ 
"label":null, 
"url":"http:\/\/mediacdn.jeevansathi.com\/866\/17\/17337654-1375139585.jpeg", 
"action":null 
}, 
"size":null, 
"album_count":"1", 
"timetext":null, 
"seen":"Y", 
"religion":"Hindu", 
"gender":"F", 
"featured":null, 
"filter_score":"", 
"filter_reason":"", 
"highlighted":0, 
"verification_seal":null, 
"verification_status":null, 
"mstatus":"Never Married", 
"college":null, 
"pg_college":null, 
"company_name":null, 
"gunascore":null, 
"name_of_user":null, 
"profileid":"9060852", 
"buttonDetails":{ 
"buttons":[ 
{ 
"iconid":"001", 
"label":"Send Interest", 
"action":"INITIATE", 
"value":null, 
"params":null 
}, 
{ 
"iconid":"003", 
"label":"Shortlist", 
"action":"SHORTLIST", 
"value":null, 
"params":"&shortlist=false" 
}, 
{ 
"iconid":"005", 
"label":"Photo", 
"action":"ALBUM", 
"value":"1", 
"params":null 
}, 
{ 
"iconid":"007", 
"label":"Contact", 
"action":"CONTACTDETAIL", 
"value":null, 
"params":null 
} 
], 
"button":null, 
"infomsgiconid":null, 
"infomsglabel":null, 
"infobtnlabel":null, 
"infobtnvalue":null, 
"infobtnaction":null 
}, 
"buttonDetailsJSMS":{ 
"buttons":[ 
{ 
"action":"INITIATE", 
"label":"Send Interest", 
"iconid":null, 
"primary":"true", 
"secondary":null, 
"params":"&stype=A", 
"enable":true, 
"id":"INITIATE" 
}, 
{ 
"iconid":null, 
"label":"View Contacts", 
"action":"CONTACT_DETAIL", 
"value":null, 
"params":null, 
"enable":true, 
"primary":"true", 
"secondary":null, 
"id":"CONTACT_DETAIL" 
}, 
{ 
"action":"CHAT", 
"label":"Chat", 
"iconid":null, 
"primary":"true", 
"secondary":null, 
"enable":true, 
"id":"CHAT", 
"params":"ZZXS8433,9060852,http:\/\/mediacdn.jeevansathi.com\/866\/17\/17337654-1375139585.jpeg,N" 
}, 
{ 
"action":"SHORTLIST", 
"iconid":"003", 
"label":"Shortlist", 
"params":"&shortlist=false", 
"primary":"true", 
"secondary":null, 
"enable":"true", 
"id":"SHORTLIST" 
} 
], 
"button":null, 
"infomsgiconid":null, 
"infomsglabel":null, 
"infobtnlabel":null, 
"infobtnvalue":null, 
"infobtnaction":null 
} 
}, 
{ 
"profilechecksum":"3c08e787ae61cbfada3232eb5393fa2fi8295748", 
"userloginstatus":"Last Online today", 
"subscription_text":null, 
"subscription_icon":null, 
"age":"22 Years", 
"username":"ZAUY2793", 
"height":"5' 2\" ", 
"occupation":"Student", 
"caste":"Jhijhotiya", 
"income":"No Income", 
"mtongue":"Hindi-MP", 
"edu_level_new":"B.Com", 
"location":"Indore", 
"photo":{ 
"label":null, 
"url":"http:\/\/mediacdn.jeevansathi.com\/713\/7\/14267803-1374952962.jpeg", 
"action":null 
}, 
"size":null, 
"album_count":"2", 
"timetext":null, 
"seen":"Y", 
"religion":"Hindu", 
"gender":"F", 
"featured":null, 
"filter_score":"", 
"filter_reason":"", 
"highlighted":0, 
"verification_seal":null, 
"verification_status":null, 
"mstatus":"Never Married", 
"college":null, 
"pg_college":null, 
"company_name":null, 
"gunascore":null, 
"name_of_user":null, 
"profileid":"8295748", 
"buttonDetails":{ 
"buttons":[ 
{ 
"iconid":"001", 
"label":"Send Interest", 
"action":"INITIATE", 
"value":null, 
"params":null 
}, 
{ 
"iconid":"003", 
"label":"Shortlist", 
"action":"SHORTLIST", 
"value":null, 
"params":"&shortlist=false" 
}, 
{ 
"iconid":"005", 
"label":"Album", 
"action":"ALBUM", 
"value":"2", 
"params":null 
}, 
{ 
"iconid":"007", 
"label":"Contact", 
"action":"CONTACTDETAIL", 
"value":null, 
"params":null 
} 
], 
"button":null, 
"infomsgiconid":null, 
"infomsglabel":null, 
"infobtnlabel":null, 
"infobtnvalue":null, 
"infobtnaction":null 
}, 
"buttonDetailsJSMS":{ 
"buttons":[ 
{ 
"action":"INITIATE", 
"label":"Send Interest", 
"iconid":null, 
"primary":"true", 
"secondary":null, 
"params":"&stype=A", 
"enable":true, 
"id":"INITIATE" 
}, 
{ 
"iconid":null, 
"label":"View Contacts", 
"action":"CONTACT_DETAIL", 
"value":null, 
"params":null, 
"enable":true, 
"primary":"true", 
"secondary":null, 
"id":"CONTACT_DETAIL" 
}, 
{ 
"action":"CHAT", 
"label":"Chat", 
"iconid":null, 
"primary":"true", 
"secondary":null, 
"enable":true, 
"id":"CHAT", 
"params":"ZAUY2793,8295748,http:\/\/mediacdn.jeevansathi.com\/713\/7\/14267803-1374952962.jpeg,N" 
}, 
{ 
"action":"SHORTLIST", 
"iconid":"003", 
"label":"Shortlist", 
"params":"&shortlist=false", 
"primary":"true", 
"secondary":null, 
"enable":"true", 
"id":"SHORTLIST" 
} 
], 
"button":null, 
"infomsgiconid":null, 
"infomsglabel":null, 
"infobtnlabel":null, 
"infobtnvalue":null, 
"infobtnaction":null 
} 
} 
] 

私は(すなわち、URLのみの部分。)それから"url":"http:\/\/mediacdn.jeevansathi.com\/866\/17\/17337654-1375139585.jpeg"を取得するための正規表現を書きたいです。

私が試した:

p = re.compile("\"url\":\".*\.jpeg\"") # parentheses for capture groups 
      print p.findall(line) # 

をしかし、それは私にURLのみのセクションを与えていません。

+0

はキャプチャグループの追加 - 're.compile(R ' "URLを": "(* \ JPEG)。"')' –

+0

が動作していません。 https://regex101.com/r/4MvHDj/1 – learner

+0

*あなたの**コード***でregex101、*ではなく、 're.findall' *を使用しています。 –

答えて

1

正規表現は間違ったツールのようです。これはJSON文字列です。それを解析して、属性の末尾がjpegの項目を検索する必要があります。

data = json.loads(my_string) 
results = [item['photo']['url'] for item in results if item.get('photo', '{}').get('url', '').endswith('jpeg')] 
+0

正規表現を使用したいのは、私の文字列が純粋なjsonではないからです。開始プレフィックスと終了プレフィックスを持つこともできます。 – learner

関連する問題