1
私のスクレイピングコードはローカルコンピュータ(Windows 8)では完全に機能しますが、Digital Oceanドロップレット(Ubuntu 16.04)では失敗します。ステータス行またはURLError..Haveはすでにこの問題をグーグルで数時間を過ごした...PhantomJS URLErrorのSelenium(Windowsでの動作はUbuntu16.04で失敗します)
セットアップ:
PhantomJS 2.1.1
セレン2.53.6
のPython 2.7
01次のようにclass Elitebet:
t1 = time.time()
driver = webdriver.PhantomJS()
def controller(self):
self.driver.get("http://www.elitebetkenya.com/coupon.php?d")
element = WebDriverWait(self.driver, 10).until(
EC.presence_of_element_located((By.ID, "page")))
soup = BeautifulSoup(self.driver.page_source.encode('utf-8'),"html.parser")
page_number = self.number_of_pages(soup)
self.eliteparser(soup)
for i in range(0, page_number - 10):
page_click = self.driver.find_element_by_xpath("//input[@value='Next']")
page_click.click()
element = WebDriverWait(self.driver, 10).until(
EC.presence_of_element_located((By.ID, "page")))
time.sleep(randint(1,2))
soup = BeautifulSoup(self.driver.page_source.encode('utf-8'),"html.parser")
self.eliteparser(soup)
t2 = time.time() - self.t1
print t2
def number_of_pages(self, x):
numbers = x.find("div", {"class" : "pgLnx"}).contents[2]
return int(re.findall(r'\d+', numbers)[0])
def eliteparser(self,x):
tbody = x.find("tbody")
# league level
for i in tbody.findAll("tr", {"class": "league"}):
league = i.get_text()
handicap01_1, handicap01_draw, handicap01_2, handicap10_1, \
handicap10_2, handicap10_draw, overfh15, underfh15, under25,\
over25 = None,None, None, None, None, None, None, None, None, None
# fixture level
for each in i.find_next_siblings("tr"):
if "league" in each.get("class", []):
break
if "fixture" in each.get("class", []):
home = each.find("span", {"class" :"home uc"}).get_text(strip=True)
away = each.find("span", {"class":"away uc"}).get_text(strip=True)
fixture_time = each.br.get_text().strip()
# print "{} vs {}".format(home,away)
for foo in each.find_next_siblings("tr"):
if "fixture" in foo.get("class", []):
break
tds = foo.findAll("td")
if tds[0].get_text().strip() == "Win-Draw-Win":
home_odds = tds[3].get_text()
draw_odds = tds[4].get_text()
away_odds = tds[5].get_text()
elif tds[0].text == "Handicap (0:1)":
handicap01_1 = tds[3].get_text()
handicap01_draw = tds[4].get_text()
handicap01_2 = tds[5].get_text()
elif tds[0].text == "Double Chance":
oneordraw = tds[3].get_text()
oneortwo = tds[4].get_text()
drawortwo = tds[5].get_text()
elif tds[0].text == "Asian (Draw No Bet)":
asian1 = tds[3].get_text()
asian2 = tds[4].get_text()
elif tds[0].text == "Goal Under/Over (2.5)":
under25 = tds[3].get_text()
over25 = tds[4].get_text()
elif tds[0].text == "1st Half Goal Under/Over (1.5)":
underfh15 = tds[3].get_text()
overfh15 = tds[4].get_text()
elif tds[0].text == "Goal - No Goal":
goal = tds[3].get_text()
no_goal = tds[4].get_text()
elif tds[0].text == "Odd - Even Goal":
odd = tds[3].get_text()
even = tds[4].get_text()
elif tds[0].text == "Handicap (1:0)":
handicap10_1 = tds[3].get_text()
handicap10_draw = tds[4].get_text()
handicap10_2 = tds[5].get_text()
print league
print "{} vs {}".format(home,away)
elite = Elitebet()
elite.controller()
エラーメッセージは次のとおりです。
File "elitebet.py", line 147, in <module>
elite.controller()
File "elitebet.py", line 45, in controller
page_click.click()
File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/webelement.py", line 72, in click
self._execute(Command.CLICK_ELEMENT)
File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/webelement.py", line 461, in _execute
return self._parent.execute(command, params)
File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/webdriver.py", line 234, in execute
response = self.command_executor.execute(driver_command, params)
File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/remote_connection.py", line 401, in execute
return self._request(command_info[0], url, body=data)
File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/remote_connection.py", line 471, in _request
resp = opener.open(request, timeout=self._timeout)
File "/usr/lib/python2.7/urllib2.py", line 429, in open
response = self._open(req, data)
File "/usr/lib/python2.7/urllib2.py", line 447, in _open
'_open', req)
File "/usr/lib/python2.7/urllib2.py", line 407, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 1228, in http_open
return self.do_open(httplib.HTTPConnection, req)
File "/usr/lib/python2.7/urllib2.py", line 1198, in do_open
raise URLError(err)
urllib2.URLError: <urlopen error [Errno 111] Connection refused>