私はそれを理解しました。 window.open
JavaScriptを実行すると、非同期にウィンドウを開くことができます。この機能を使用すると、同時に複数のページにアクセスできます。ダウンロードを開始したら、独自の待機ロジックを実装して、ページの読み込みが完了した時点を判断できます。
def download_parallel(urls, driver, process_page):
'''Download pages in parallel using Selenium
urls: a list of urls to download
driver: The selenium webdriver
process_page: a function that takes in the url and the driver to process
the page as you see fit.
'''
start_handle = driver.current_window_handle
handles = []
# Step 1: Initiate all of the page downloads
for i, url in enumerate(urls):
driver.switch_to.window(driver.window_handles[i])
old_handles = driver.window_handles
# Initiate a page get without waiting for onload
driver.execute_script('window.open("%s", "para_win_%02d", '
'"height = 450, width = 800, menubar=yes,scrollbars=yes,toolbar=yes,'
'location=no,resizable=yes");'%(url, i))
# We have to determine the handle for the new window.
for h in driver.window_handles:
if h not in old_handles:
handles.append(h)
break
# Step 2: Wait for the pages to download.
for i, url in enumerate(urls):
driver.switch_to.window(handles[i])
# Wait for some css to load. There are other waiting functions you can use.
WebDriverWait(driver, 10).until(
EC.visibility_of_element_located((By.CSS_SELECTOR, "body #my_main"))
)
# Do more processing of the page here
process_page(url, driver)
# Close the window now that we're done with it.
driver.close()
# Go back to the window we started in
driver.switch_to.window(start_handle)