0
私はハイキングのリンクを取得するために簡単なスパイダーを書いた。Scrapyスパイダーを構築しましたが、リンクに従っていません
from scrapy.spiders import Spider
from scrapy.selector import Selector
from oregon_hikes_scrapper.items import HikeLinkItem
ENDPOINTS = [ 'from="%27%27Peter_Iredale%27%27&to=Bonney_Meadows-Hidden_Meadows_Trail_Junction', \
'from=Bonney_Meadows-Hidden_Meadow_Trail_Junction&to=Clatsop_Loop_Hike',
]
class OrHikeSpider(Spider):
name ='or_hikes'
allowed_domains = "oregonhikers.org"
start_url = [
"http://www.oregonhikers.org/field_guide/Special:AllPages&" + l for l in ENDPOINTS
]
def parse(self, response):
hikes = Selector.xpath('//*[@id="mw-content-text"]/table[2]/tbody/tr[1]/td[1]/div/a')
for hike in hikes:
item = HikeLinkItem()
item['hike'] = hike.xpath('@title').extract()
item['link'] = hike.xpath('@href').extract()
yield item