0
Chrome Xpath Helperのhttp://tieba.baidu.com/f?kw=dota2&fr=indexに正しいリンクが表示されます。Img Scrapyに正しいxpathの結果がありません
> E:\ladder\tieba\tieba\spiders\tiebaSpiber.py:11: ScrapyDeprecationWarning: tieba.spiders.tiebaSpiber.tiebaSpider inherits from deprecated class scrapy.spiders.BaseSpider, please inherit from scrapy.spiders.Spider. (warning only on first subclass, there may be others)
class tiebaSpider(BaseSpider):
img_url:
['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
スパイダーコード: しかしscrapyのクモでは、このログのように何の結果を持っていない
class tiebaSpider(BaseSpider):
name = "tiebaSpider"
allowed_domains = ["tieba.baidu.com"]
download_delay = 1
start_urls = ["http://tieba.baidu.com/f?ie=utf-8&kw=dota2", ]
rules = (
Rule(LinkExtractor(allow=(r'http://tieba.baidu.com/f?kw=dota2&ie=utf-8&pn=')), callback='parse_tieba',
follow=True),
)
def parse_tieba(self, response):
self.log("Fetch Dota2 Tieba Page:%s" % response.url)
sel = Selector(response)
rep_num = sel.xpath('//span[@class="threadlist_rep_num center_text"]/text()').extract()
title = sel.xpath('//div[@class="threadlist_title pull_left j_th_tit "]/a/text()').extract()
author = sel.xpath('//span[@class="frs-author-name-wrap"]/a/text()').extract()
img_url = sel.xpath('//div[@class="threadlist_text pull_left"]//div[@class="small_wrap j_small_wrap"]//a[@class="thumbnail vpic_wrap"]/img/@src').extract()
item = TiebaItem()
item['rep_num'] = [n for n in rep_num]
item['title'] = [n for n in title]
item['author'] = [n for n in author]
item['img_url'] = [n for n in img_url]
print("img_url:\n")
print(img_url)
yield item