治療パイプライン - CSV出力 - 出力WebページURL

私は、パイプラインを使用して、治療プロジェクトからcsvファイルにデータをエクスポートしています。各行は、単一の治療オブジェクトから抽出されたデータと抽出された日付を取得します。治療パイプライン - CSV出力 - 出力WebページURL

各行の最後に、項目が抽出されたURLをどのように追加出力できるかを知りたいと思います。

ここでは、あなたは単にあなたの商品にurlフィールドを追加し、パイプラインでの使用にそれを追加することができ、私のpypeline

def open_spider(self, spider): 
    if spider.name == "LandSalesLinks": 
     self.file = csv.writer(open('links.csv', 'w'), quoting=csv.QUOTE_NONE) 
    elif spider.name == "LandSalesDetails": 
     now = datetime.now() 
     filepath = u"E:/Dropbox/Αγορά Ακινήτων/Πωλήσεις Γης/Αρχεία προς ένταξη/" 
     filename = str(now.year)+"-"+str(now.month)+"-"+str(now.day)+" "+'details.csv' 
     self.file = csv.writer(open(filepath+filename, 'w'), delimiter=';', quoting=csv.QUOTE_NONE) 
     row = [ 
      "extraction_date", 
      "regionA", 
      "regionB", 
      "regionC", 
      "regionD", 
      "location_name", 
      "category", 
      "area", 
      "price", 
      "city_plan", 
      "structure_factor", 
      "coverage_factor", 
      "facade_length", 
      "facade_count", 
      "airy", 
      "slope", 
      "artio", 
      "oikodomisimo", 
      "me_adia", 
      "ktizei", 
      "availability", 
      "availability_from", 
      "antiparoxi", 
      "view", 
      "dist_from_sea", 
      "paling", 
      "supplies", 
      "drilling", 
      "with_building", 
      "corner_plot", 
      "mesites", 
      "epaggelmatiki_xrisi", 
      "dimensions", 
      "contains" 
     ] 
     self.file.writerow(row) 

def process_item(self, item, spider): 
    if spider.name == "LandSalesLinks": 
     # Declaring an empty list that represents a row of the table 
     row = [] 
     # First and only column 
     row.append("http://www.xe.gr"+str(item['link'][0])+"?mode=spec") 
     # Writing the row to the file 
     self.file.writerow(row) 
     return item 

    elif spider.name == "LandSalesDetails": 

     def append2(row, item): 
      if item != []: 
       row.append(item[0].encode('utf-8').strip()) 
      else: 
       row.append("") 
     # Declaring an empty list that represents a row of the table 
     row = [] 
     now = datetime.now() 
     row.append(str(now.day)+"/"+str(now.month)+"/"+str(now.year)) 
     # append2(row, item['region']) <-- old 
     # Seperation region to regionA, regionB, regionC and regionD 
     region = item['region'][0] 
     reglist = region.split(" > ") 
     # print reglist[0].strip() 
     # print reglist[1].strip() 
     # print reglist[2].strip() 
     # print reglist[3].strip() 
     # s = input("stop") 
     # Region A 
     row.append(reglist[0].strip().encode('utf-8')) 
     # Region B 
     try: 
      row.append(reglist[1].strip().encode('utf-8')) 
     except IndexError: 
      row.append("") 
     # Region C 
     try: 
      row.append(reglist[2].strip().encode('utf-8')) 
     except IndexError: 
      row.append("") 
     # Region D 
     try: 
      row.append(reglist[3].strip().encode('utf-8')) 
     except IndexError: 
      row.append("") 
     append2(row, item['location_name']) 
     append2(row, item['category']) 
     append2(row, item['area']) 
     append2(row, item['price']) 
     append2(row, item['city_plan']) 
     append2(row, item['structure_factor']) 
     append2(row, item['coverage_factor']) 
     append2(row, item['facade_length']) 
     append2(row, item['facade_count']) 
     append2(row, item['airy']) 
     append2(row, item['slope']) 
     append2(row, item['artio']) 
     append2(row, item['oikodomisimo']) 
     append2(row, item['me_adia']) 
     append2(row, item['ktizei']) 
     append2(row, item['availability']) 
     append2(row, item['availability_from']) 
     append2(row, item['antiparoxi']) 
     append2(row, item['view']) 
     append2(row, item['dist_from_sea']) 
     append2(row, item['paling']) 
     append2(row, item['supplies']) 
     append2(row, item['drilling']) 
     append2(row, item['with_building']) 
     append2(row, item['corner_plot']) 
     append2(row, item['mesites']) 
     append2(row, item['epaggelmatiki_xrisi']) 
     append2(row, item['dimensions']) 
     append2(row, item['contains']) 
     # Writing the row to the file 
     self.file.writerow(row) 
     return item

出典

2016-08-18 errorLogger

'append2'の代わりに' append（value or ""） 'を使うことができます。which wi 'bool（value）'がFalseなら '' ''を、 '[]' 'なら' '' 'を追加します。 – Granitosaurus

のコードを参照することができます

def parse(self, response): 
    item = MyItem() 
    item['url'] = response.url 
    yield item

やパイプラインのを：

def process_item(self, item, spider): 
    row = [] 
    row.append(item['url']) 
    # and then remove it if you don't want it in your item 
    del item['url'] 
    return item

出典

2016-08-18 09:58:57 Granitosaurus

ありがとうございます！それはうまく動作します:) – errorLogger

@ΙωάννηςBουβάκηςΜανουσάκης素晴らしい！あなたの問題を解決した場合、その答えを受け入れることができますか？ :) – Granitosaurus

確かに！残念ながら私はそれをまだ有効に投票することはできません：/ – errorLogger

治療パイプライン - CSV出力 - 出力WebページURL

答えて

関連する問題