bs4パーサーは不完全なリストを保持します

私はいくつかのコードを書いていますが、最初の部分は完全に（90値を含む）取り込みますが、2番目の部分は価格が不完全リストを保持します（30を含みます）。ループのようなそのセグメントのために働いていないようだ。
フルリストを保持するためにこのコードをどのように変更する必要がありますか？
ありがとうございます！bs4パーサーは不完全なリストを保持します

import re 
import requests 
from bs4 import BeautifulSoup 

url = "http://www.watcheszon.com/casio-g-shock-c-19_20/?pg=store" 


DATA_CONTAINER = list() 
DATA = list() 

def collectData(): 

    global DATA_CONTAINER 
    global DATA 


    for i in range(1, 5): 
     newUrl = url + "&sort=20a&page=" + str(i) 
     r = requests.get(newUrl) 
     soup = BeautifulSoup(r.content, "lxml") 
     #print(soup) 
     g_data_odd = soup.find_all("td", {"class": "productListing-data"}) 
     for item in g_data_odd:   
      t = item.find_all("div", {"class": "product_name"}) 
      i = list() 
      for name in t: 
       piece = name.find('a').text 
       i.append(piece) 
       #print(piece) 
       # for pc in piece: 
       # i.append(pc.replace("\r", "").replace("\n", "").replace("\t", "")) 
       # print(pc) 
       DATA_CONTAINER.append(piece) 

     spans = soup.find_all('span', {"class": "productSalePrice"}) 
     # create a list of lines corresponding to element texts 
     lines = [span.get_text() for span in spans] 
     # collect the dates from the list of lines using regex matching groups 
     found_dates = [] 
     for line in lines: 
      m = re.search(r'[USD]+\d{2,3}.\d{2}', line) 
      if m: 
       found_dates.append(str(m.group(0))) 
       # print the dates we collected 
     # for date in found_dates: 
     #  print(date) 

     # DATA_J = DATA_CONTAINER[:] 
     DATA = list(zip(DATA_CONTAINER, found_dates)) 
     print(DATA) 

def serializeToCSV(fileName): 
    with open(fileName, "w") as fd: 
     for item in DATA: 
      fd.write(u' '.join(item).encode('utf-8') + "\n") 

collectData() 
print(len(DATA)) 
serializeToCSV('csv.csv')

出典

2017-02-21 Konstantin Monahov

this codeをお試しください：ループ内

CallメソッドserializeToCSV（ライン17）

は、ファイルを作成するためのライン53の使用 "a"オプションで

import re 
import requests 
from bs4 import BeautifulSoup 

url = "http://www.watcheszon.com/casio-g-shock-c-19_20/?pg=store" 


DATA_CONTAINER = list() 
DATA = list() 

def collectData(): 

    global DATA_CONTAINER 
    global DATA 


    for i in range(1, 5): 
     newUrl = url + "&sort=20a&page=" + str(i) 
     r = requests.get(newUrl) 
     soup = BeautifulSoup(r.content, "lxml") 
     #print(soup) 
     g_data_odd = soup.find_all("td", {"class": "productListing-data"}) 
     for item in g_data_odd:   
      t = item.find_all("div", {"class": "product_name"}) 
      i = list() 
      for name in t: 
       piece = name.find('a').text 
       i.append(piece) 
       #print(piece) 
       # for pc in piece: 
       # i.append(pc.replace("\r", "").replace("\n", "").replace("\t", "")) 
       # print(pc) 
       DATA_CONTAINER.append(piece) 

     spans = soup.find_all('span', {"class": "productSalePrice"}) 
     # create a list of lines corresponding to element texts 
     lines = [span.get_text() for span in spans] 
     # collect the dates from the list of lines using regex matching groups 
     found_dates = [] 
     for line in lines: 
      m = re.search(r'[USD]+\d{2,3}.\d{2}', line) 
      if m: 
       found_dates.append(str(m.group(0))) 
       # print the dates we collected 
     # for date in found_dates: 
     #  print(date) 

     # DATA_J = DATA_CONTAINER[:] 
     DATA = list(zip(DATA_CONTAINER, found_dates)) 
     print(DATA) 

     def serializeToCSV(fileName): 
      with open(fileName, "a") as fd: 
       for item in DATA: 
        fd.write(u' '.join(str(item)) + "\n") 

      print(len(DATA)) 
     serializeToCSV('csv.csv') 

collectData()

出典

2017-02-22 10:58:54

bs4パーサーは不完全なリストを保持します

答えて

関連する問題