0
トリップアドバイザーのスクレーパーを作って、私のpythonスキルを向上しようとしています。 現在、スクレーパーは都市のレストランを掻き集めて、名前とトリップアドバイザーのURLをExcelファイルに保存することができます。しかし、私はレストランの電子メールと直接URLを保存するためのアドバイスを探しています。tripadvisor scrapeレストランのURLと電子メール
誰でもこの情報を入力できますか? 乾杯
import requests
from tkinter import *
from bs4 import BeautifulSoup as b
from bs4 import Comment as com
from openpyxl import Workbook
# city_name = 'London_England'
# geo_code = '186338'
def o_and_t():
\t global nameFile, geo_code, city_name
\t nameFile = e_1.get() + '.xlsx'
\t geo_code = e_2.get()
\t city_name = e_3.get()
\t root.destroy()
\t return None
def gui():
\t global root,e_1,e_2,e_3
\t root = Tk()
\t root.geometry('500x230')
\t root.configure(bg = 'black')
\t root.title('Enter Details')
\t #
\t l_0 = Label(root,text = '\t\tTripAdvisorScraper\n \t\t ~by a1b2t',font = ("Helevetica",14),bg = 'black',fg = 'white')
\t l_0.place(x = 0 ,y = 0)
\t #
\t l_1 = Label(root,text = 'Please Enter the FileName : ',font = ("Helevetica",11),bg = 'black',fg = 'white')
\t l_1.place(x = 0 , y = 60)
\t #
\t l_2 = Label(root,text = 'Please enter the code from the url : ',font = ("Helevetica",11),bg = 'black',fg = 'white')
\t l_2.place(x = 0 , y = 90)
\t #
\t l_3 = Label(root,text = 'Please enter the city and country as in url :',font = ("Helevetica",12),bg = 'black',fg = 'white')
\t l_3.place(x = 0,y = 120)
\t #
\t e_1 = Entry(root)
\t e_1.place(x = 320 ,y = 60)
\t #
\t e_2 = Entry(root)
\t e_2.place(x = 320 ,y = 90)
\t #
\t e_3 = Entry(root)
\t e_3.place(x = 320 ,y = 120)
\t #
\t b_1 = Button(root,text = 'START',command = o_and_t)
\t b_1.place(x = 220 ,y = 170)
\t root.mainloop()
\t return None
gui()
print('\n\n\tStarting Scraper\t\n\n')
main_url = 'https://www.tripadvisor.co.uk/Restaurants-g{}-{}.html'.format(geo_code, city_name)
'https://www.tripadvisor.co.uk/RestaurantSearch?Action=PAGE&geo=186338&ajax=1&itags=10591&sortOrder=relevance&o=a150&availSearchEnabled=false'
'https://www.tripadvisor.co.uk/RestaurantSearch?Action=PAGE&geo=186338&ajax=1&itags=10591&sortOrder=relevance&o=a60&availSearchEnabled=false'
'https://www.tripadvisor.co.uk/RestaurantSearch?Action=PAGE&geo=186338&ajax=1&itags=10591&sortOrder=relevance&o=a30&availSearchEnabled=false'
req_1 = requests.get(main_url)
soup = b(req_1.content, 'html.parser')
total_pages = int(soup.find_all('a', class_="pageNum taLnk")[-1]['data-page-number']) + 1
print(total_pages)
RESULTS = []
for page_no in range(0, total_pages*30 , 30):
\t page_no
\t url = 'https://www.tripadvisor.co.uk/RestaurantSearch?Action=PAGE&geo={}&ajax=1&itags=10591&sortOrder=relevance&o=a{}&availSearchEnabled=false'.format(geo_code, page_no)
\t req_2 = requests.get(url)
\t soup_2 = b(req_2.content, 'html.parser')
\t temp = soup_2.find_all('a', class_="property_title")
\t for t in temp:
\t \t r_name = t.text.replace('\n', '').replace('\t', '')
\t \t r_url = 'https://www.tripadvisor.com' + t['href']
\t \t print(str([r_name, r_url]).encode())
\t \t RESULTS.append([r_name, r_url])
if len(RESULTS) !=0: \t \t
\t wb = Workbook(write_only=True)
\t ws = wb.create_sheet()
\t for steps_0 in RESULTS:
\t \t ws.append(steps_0)
\t wb.save(nameFile)
print(len(RESULTS))
\t
\t