-1
以下のコードでは、resultsBlockFooterの "going Allowance"とは別に、すべてのデータをスクラップから取得できます。ソース内のほとんどのデータはList(li)行く余裕はスパンで囲まれています。私はさまざまなバリエーションを試してみましたが、それを抽出するためにちょうどカンをセックスしてください。BSはスパンタグでデータを取得できません
import csv
from bs4 import BeautifulSoup
import requests
html = requests.get("http://www.sportinglife.com=156432).text
soup = BeautifulSoup(html,'lxml')
rows = []
for header in soup.find_all("div", class_="resultsBlockHeader"):
track = header.find("div", class_="track").get_text(strip=True).encode('ascii', 'ignore').strip("|")
date = header.find("div", class_="date").get_text(strip=True).encode('ascii', 'ignore').strip("|")
datetime = header.find("div", class_="datetime").get_text(strip=True).encode('ascii', 'ignore').strip("|")
grade = header.find("div", class_="grade").get_text(strip=True).encode('ascii', 'ignore').strip("|")
distance = header.find("div", class_="distance").get_text(strip=True).encode('ascii', 'ignore').strip("|")
prizes = header.find("div", class_="prizes").get_text(strip=True).encode('ascii', 'ignore').strip("|")
results = header.find_next_sibling("div", class_="resultsBlock").find_all("ul", class_="line1")
details = []
for result in results:
fin = result.find("li", class_="fin").get_text(strip=True)
greyhound = result.find("li", class_="greyhound").get_text(strip=True)
trap = result.find("li", class_="trap").get_text(strip=True)
sp = result.find("li", class_="sp").get_text(strip=True)
timeSec = result.find("li", class_="timeSec").get_text(strip=True)
timeDistance = result.find("li", class_="timeDistance").get_text(strip=True)
details.append({"greyhound": greyhound, "sp": sp, "fin": fin, "timeSec": timeSec, "timeDistance": timeDistance, "trap": trap })
results = header.find_next_sibling("div", class_="resultsBlock").find_all("ul", class_="line2")
for index, result in enumerate(results):
trainer = result.find("li", class_="trainer").get_text(strip=True)
details[index]["trainer"] = trainer
results = header.find_next_sibling("div", class_="resultsBlock").find_all("ul", class_="line3")
for index, result in enumerate(results):
comment = result.find("li", class_="comment").get_text(strip=True)
details[index]["comment"] = comment
results = header.find_next_sibling("div", class_="resultsBlock").find_all("ul", class_="line2")
for index, result in enumerate(results):
firstessential = result.find("li", class_="first essential").get_text(strip=True)
details[index]["first essential"] = firstessential
results = header.find_next_sibling("div", class_="resultsBlockFooter").find_all("ul", class_="line3")
for index, result in enumerate(results):
goingAllowance = result.find("div", class_="Going Allowance").get_text(strip=True)
details[index]["Going Allowance"] = goingAllowance
for detail in details:
detail.update({"track": track, "date": date, "datetime": datetime, "grade": grade, "prizes": prizes})
rows.append(detail)
with open("abc.csv","a") as f:
writer = csv.DictWriter(f, [track","date","trap","fin","greyhound","datetime","sp","grade","distance"," prizes","timeSec","timeDistance","trainer","comment","first essential","going Allowance"])
for row in rows:
writer.writerow(row)