2011-12-26 2 views
1

私は、次のコードを実行すると、私はこのエラーを取得しておいてください。Pythonプログラムでこのタイトルの一致エラーが発生するのはなぜですか?

Traceback (most recent call last): 
    File "C:\Users\Robert\Documents\j-a-c-o-b\newlc.py", line 94, in <module> 
    main() 
    File "C:\Users\Robert\Documents\j-a-c-o-b\newlc.py", line 71, in main 
    for final_url in pool.imap(handle_listing, listings): 
    File "C:\Python27\lib\site-packages\eventlet-0.9.16-py2.7.egg\eventlet\greenpool.py", line 232, in next 
    val = self.waiters.get().wait() 
    File "C:\Python27\lib\site-packages\eventlet-0.9.16-py2.7.egg\eventlet\greenthread.py", line 166, in wait 
    return self._exit_event.wait() 
    File "C:\Python27\lib\site-packages\eventlet-0.9.16-py2.7.egg\eventlet\event.py", line 120, in wait 
    current.throw(*self._exc) 
    File "C:\Python27\lib\site-packages\eventlet-0.9.16-py2.7.egg\eventlet\greenthread.py", line 192, in main 
    result = function(*args, **kwargs) 
    File "C:\Users\Robert\Documents\j-a-c-o-b\newlc.py", line 35, in handle_listing 
    title, = TITLE_MATCH.match(listing_title).groups() 
AttributeError: 'NoneType' object has no attribute 'groups' 

間違っていますか?

タイトルの一致と関係がありますが、修正方法はわかりません。

本当にありがとうございます。

ありがとうございます!

from gzip import GzipFile 
from cStringIO import StringIO 
import re 
import webbrowser 
import time 
from difflib import SequenceMatcher 
import os 
import sys 
from BeautifulSoup import BeautifulSoup 
import eventlet 
from eventlet.green import urllib2 
import urllib2 
import urllib 

def download(url): 
    print "Downloading:", url 
    s = urllib2.urlopen(url).read() 
    if s[:2] == '\x1f\x8b': 
     ifh = GzipFile(mode='rb', fileobj=StringIO(s)) 
     s = ifh.read() 
    print "Downloaded: ", url 
    return s 

def replace_chars(text, replacements): 
    return ''.join(replacements.get(x,x) for x in text) 

def handle_listing(listing_url): 
    listing_document = BeautifulSoup(download(listing_url)) 

    # ignore pages that link to yellowpages 
    if not listing_document.find("a", href=re.compile(re.escape("http://www.yellowpages.com/") + ".*")): 
     listing_title = listing_document.title.text 
     reps = {' ':'-', ',':'', '\'':'', '[':'', ']':''} 
     title, = TITLE_MATCH.match(listing_title).groups() 
     address, = ADDRESS_MATCH.match(listing_title).groups() 

     yellow_page_url = "http://www.yellowpages.com/%s/%s?order=distance" % (
      replace_chars(address, reps), 
      replace_chars(title, reps), 
     ) 

     yellow_page = BeautifulSoup(download(yellow_page_url)) 

     page_url = yellow_page.find("h3", {"class" : "business-name fn org"}) 
     if page_url: 
      page_url = page_url.a["href"] 

      business_name = title[:title.index(",")] 

      page = BeautifulSoup(download(page_url)) 
      yellow_page_address = page.find("span", {"class" : "street-address"}) 
      if yellow_page_address: 

       if SequenceMatcher(None, address, yellow_page_address.text).ratio() >= 0.5: 
        pid, = re.search(r'p(\d{5,20})\.jsp', listing_url).groups(0) 
        page_escaped = replace_chars(page_url, {':':'%3A', '/':'%2F', '?':'%3F', '=':'%3D'}) 

        final_url = "http://www.locationary.com/access/proxy.jsp?ACTION_TOKEN=proxy_jsp$JspView$SaveAction&inPlaceID=%s&xxx_c_1_f_987=%s" % (
          pid, page_escaped) 
        return final_url 


def main(): 

    pool = eventlet.GreenPool() 
    listings_document = BeautifulSoup(download(START_URL)) 
    listings = listings_document.findAll("a", href = LOCATION_LISTING) 
    listings = [listing['href'] for listing in listings] 

    for final_url in pool.imap(handle_listing, listings): 
     print final_url 


     if str(final_url) is not None: 

      url = str(final_url) 

      req = urllib2.Request(url) 
      response = urllib2.urlopen(req) 
      page = response.read() 
      time.sleep(2) 



for a in range(2,3): 

    START_URL = 'http://www.locationary.com/place/en/US/New_Jersey/Randolph-page' + str(a) + '/?ACTION_TOKEN=NumericAction' 
    TITLE_MATCH = re.compile(r'(.*) \(\d{1,10}.{1,100}\)$') 
    ADDRESS_MATCH = re.compile(r'.{1,100}\((.*), .{4,14}, United States\)$') 
    LOCATION_LISTING = re.compile(r'http://www\.locationary\.com/place/en/US/.{1,50}/.{1,50}/.{1,100}\.jsp') 

    if __name__ == '__main__': 
     main() 

答えて

4

あなたのエラーからの引用:

title, = TITLE_MATCH.match(listing_title).groups()
AttributeError: 'NoneType' object has no attribute 'groups'

TITLE_MATCH.match(listing_title)戻りNone、あなたが.groups()を呼び出すことはできませんので。

+0

は、だから私は入れてしまうでしょう: 'TITLE_MATCHがNoneでない場合:' – jacob501

+1

いいえ。あなたがチェックしたいと思います実際の正規表現自体ではなく 'match()'の出力です。 –

+1

'if TITLE_MATCH.match(listing_title)not None:'(少しだけ明確にする –

2

re.matchと一致するものが見つからない場合は、Noneを返します。 None.groups()と電話をかけることはできませんので、最初に一致を確認する必要があります。これを行うには:

変更この:これに

title, = TITLE_MATCH.match(listing_title).groups() 
address, = ADDRESS_MATCH.match(listing_title).groups() 

titleMatch = TITLE_MATCH.match(listing_title) 
if titleMatch: 
    title, = titleMatch.groups() 
else: 
    # handle it 

addressMatch = ADDRESS_MATCH.match(listing_title) 
if addressMatch: 
    address, = addressMatch.groups() 
else: 
    # handle it 
関連する問題