2017-07-04 11 views
1

.xlsxファイルをPythonにインポートしようとしています。私はGoogleとGoogleを持っています。私はこのコードを書いて、.csvのために働いていましたが、それは.xlsxファイルのために必要です。だから私は基本的に一緒につなぎ、それがうまくいくことを望んで雹のメアリーをした。すべての助けが大歓迎です!xlsrdを使用してxlsxファイルをPythonにインポートする

**私のコード全体とデータファイルのスニペットを追加しました。 .xlsxをインポートしてデータを実行する必要があります。

import xlrd 

workbook = xlrd.open_workbook('GAT_US_PartReview_2017-06-23.xlsx') 

worksheet = workbook.get_sheet(0, update_pos=True) 

header = [] 
report_card = [] 

# Count Stmnts for Required Data 
valid_count = 0 
app_req_count = 0 
intr_req_count = 0 
oe_intr_req_count = 0 
part_img_req_count = 0 
upc_req_count = 0 
unspsc_req_count = 0 
msds_req_count = 0 

# Count Stmts for Missing Data 
missing_app_count = 0 
missing_intr_count = 0 
missing_oe_intr_count = 0 
missing_mpcc_count = 0 
missing_attr_values_count = 0 
missing_part_img_count = 0 
missing_upc_count = 0 
missing_warr_text_count = 0 
missing_warr_pdf_count = 0 
missing_unspsc_count = 0 
missing_msds_count = 0 


for row_num, row in enumerate(worksheet): 
    if row_num <= 4: 
     # print(row) # Print out the header 
     header.append([row[0], row[2]]) 
    else: 
     hq_line, part_no, part_class, appl_req, appl_count, intr_req, 
     intr_count, oe_intr_req, has_oe_intr, has_attr_editor, 
     has_attr_values, part_img_req, has_part_img, has_mpcc, warr_req, 
     has_warr_txt, has_warr_pdf, msds_req, has_msds, upc_req, has_upc, 
     has_unspsc, attr_count, attrval_count, valid_part = row 

    if valid_part == 'YES': 
     valid_count += 1 

     # Required Parts Count 
     if appl_req == 'YES': 
      app_req_count += 1 
     if intr_req == 'YES': 
      intr_req_count += 1 
     if oe_intr_req == 'YES': 
      oe_intr_req_count += 1 
     if part_img_req == 'YES': 
      part_img_req_count += 1 
     if upc_req == 'YES': 
      upc_req_count += 1 
     if msds_req == 'YES': 
      msds_req_count += 1 

     # Missing Data Counts 
     if appl_req == 'YES' and appl_count == '0': 
      missing_app_count += 1 
     if intr_req == 'YES' and intr_count == '0': 
      missing_intr_count += 1 
     if oe_intr_req == 'YES' and has_oe_intr == '0': 
      missing_oe_intr_count += 1 
     if has_mpcc == 'NO': 
      missing_mpcc_count += 1 
     if has_attr_values == 'NO': 
      missing_attr_values_count += 1 
     if has_part_img == 'NO': 
      missing_part_img_count += 1 
     if upc_req == 'YES' and has_upc == '0': 
      missing_upc_count += 1 
     if warr_req == 'YES' and has_warr_txt == 'NO': 
      missing_warr_text_count += 1 
     if warr_req == 'YES' and has_warr_pdf == 'NO': 
      missing_warr_pdf_count += 1 
     if has_unspsc == 'NO': 
      missing_unspsc_count += 1 
     if msds_req == 'YES' and has_msds == 'NO': 
      missing_msds_count += 1 

# Statements for Required Counts 
valid_parts = ('Number of Valid Parts: ', '{:,}'.format(valid_count)) 
application_required = ('Application Records Required: ', 
'{:,}'.format(app_req_count)) 
interchange_required = ('Interchange Records Required: ', 
'{:,}'.format(intr_req_count)) 
oe_interchange_required = ('OE Interchange Records Required: ', 
'{:,}'.format(oe_intr_req_count)) 
mpcc = ('MPCC Required: ', '{:,}'.format(valid_count)) # Every valid part 
requires a MPCC 
attributes = ('Attributes Required: ', '{:,}'.format(valid_count)) # Every 
valid part requires attributes 
image_required = ('Image Required: ', '{:,}'.format(part_img_req_count)) 
upc = ('UPC Requited: ', '{:,}'.format(upc_req_count)) 
warranties = ('Warranty Text/PDF Required: ', '{:,}'.format(valid_count)) # 
Require warranty text/pdf on all parts 
unspsc = ('UNSPSC Code Required: ', '{:,}'.format(valid_count)) # Require 
UNSPSC Codes for all parts 
msds = ('MSDS Required: ', '{:,}'.format(msds_req_count)) 

# Statements for Missing Counts 
missing_applications = ('Missing Applications: ', 
'{:,}'.format(missing_app_count)) 
missing_interchange = ('Missing Interchange: ', 
'{:,}'.format(missing_intr_count)) 
missing_oe_interchange = ('Missing OE Interchange: ', 
'{:,}'.format(missing_oe_intr_count)) 
missing_mpcc = ('Missing MPCC: ', '{:,}'.format(missing_mpcc_count)) 
missing_attributes = ('Missing Attributes: ', 
'{:,}'.format(missing_attr_values_count)) 
missing_image = ('Missing Image: ', '{:,}'.format(missing_part_img_count)) 
missing_UPC = ('Missing UPC: ', '{:,}'.format(missing_upc_count)) 
missing_warranty_text = ('Missing Warranty Text: ', 
'{:,}'.format(missing_warr_text_count)) 
missing_warranty_pdf = ('Missing Warranty PDF: ', 
'{:,}'.format(missing_warr_pdf_count)) 
missing_unspsc = ('Missing UNSPSC Code: ', 
'{:,}'.format(missing_unspsc_count)) 
missing_msds = ('Missing MSDS: ', '{:,}'.format(missing_msds_count)) 

# CSV Output 
report_card.append(valid_parts) 
report_card.append(application_required) 
report_card.append(interchange_required) 
report_card.append(oe_interchange_required) 
report_card.append(mpcc) 
report_card.append(attributes) 
report_card.append(image_required) 
report_card.append(upc) 
report_card.append(warranties) 
report_card.append(unspsc) 
report_card.append(msds) 
report_card.append(missing_applications) 
report_card.append(missing_interchange) 
report_card.append(missing_oe_interchange) 
report_card.append(missing_mpcc) 
report_card.append(missing_attributes) 
report_card.append(missing_image) 
report_card.append(missing_UPC) 
report_card.append(missing_warranty_text) 
report_card.append(missing_warranty_pdf) 
report_card.append(missing_unspsc) 
report_card.append(missing_msds) 

for row in header: 
    print(row) 

for x in report_card: 
    print(x) 



with open('Report_Card.csv', 'w', newline='') as f: 
    writer = csv.writer(f) 
    writer.writerows(header) 
    writer.writerows(report_card) 

CSVファイル:それはExcelワークブックオブジェクトだけでなく、.CSVファイルなので

Supplier Line:,,Gates Rubber - Denver (GAT),,,,,,,,,,,,,,,,,,,,,, 
Summary:,,Parts HQ Abbr,,,,,,,,,,,,,,,,,,,,,, 
ACCT No:,,40013586,,,,,,,,,,,,,,,,,,,,,, 
RecCount:,,10221,,,,,,,,,,,,,,,,,,,,,, 
Applicable Date:,,"June 14, 2017 (Wednesday)",,,,,,,,,,,,,,,,,,,,,, 
,,,,,,,,,,,,,,,,,,,,,,,, 
HQ Line,Part No,Part Class,Appl Req,Appl Count ,Intr Req,Intr Count ,OE Intr Req,Has OE Intr,Has Attr Editor, Has Attr Values,Part IMG Req,Has Part IMG,Has MPCC,Warr Req,Has Warr TXT,Has Warr PDF,MSDS Req,Has MSDS,UPC Req,Has UPC,Has UNSPSC,Attr Count ,AttrVal Count ,Valid Part 
GAT,'27210',S,NO,0,YES,1,YES,NO,YES,YES,YES,YES,YES,YES,YES,YES,NO,NO,YES,YES,YES,30,13,YES 
GAT,'27211',O,NO,0,YES,1,YES,NO,YES,YES,YES,YES,YES,YES,YES,YES,NO,NO,YES,YES,YES,30,14,YES 
GAT,'27212',S,NO,0,YES,1,YES,NO,YES,YES,YES,YES,YES,YES,YES,YES,NO,NO,YES,YES,YES,30,13,YES 
GAT,'27213',S,NO,0,YES,1,YES,NO,YES,YES,YES,YES,YES,YES,YES,YES,NO,NO,YES,YES,YES,30,13,YES 
GAT,'27220',S,NO,0,YES,2,YES,NO,YES,YES,YES,YES,YES,YES,YES,YES,NO,NO,YES,YES,YES,35,20,YES 
GAT,'27221',S,NO,0,YES,2,YES,NO,YES,YES,YES,YES,YES,YES,YES,YES,NO,NO,YES,YES,YES,35,20,YES 
+0

A.を使用することができます.csvのインポートのために働いていましたが、現在はbookを反復可能なものとして認識しません。 –

+0

質問を編集してXLSXファイルの小さなサンプルを含めると役立ちます。 –

+0

それ以外の場合は、xlsxファイルをCSV形式で保存してください。 – boardrider

答えて

0

、あなたの本のオブジェクトがシートから構成されています。まずシートをつかんで、次にsheet.get_rows()を呼び出して行を取得する必要があります。

次のコードは動作するはずです。私はローカルでテストしました。次のように

import xlrd 

book = xlrd.open_workbook('GAT_US_PartReview_2017-06-23.xlsx') 

header = [] 
report_card = [] 

# Count Stmnts for Required Data 
valid_count = 0 
app_req_count = 0 
intr_req_count = 0 

# Count Stmts for Missing Data 
missing_app_count = 0 
missing_intr_count = 0 
missing_oe_intr_count = 0 

sheet = book.sheets()[0] 
for row_num, row in enumerate(sheet.get_rows()): 
    if row_num <= 4: 
     # print(row) # Print out the header 
     header.append([row[0], row[2]]) 

print(header) 
+0

全体コードとデータサンプル –

0

xlrdライブラリにアクセスするためのあなたのコードは、いくつかの修正が必要:あなたが提供したデータについては

import xlrd 

workbook = xlrd.open_workbook('GAT_US_PartReview_2017-06-23.xlsx') 
worksheet = workbook.sheet_by_index(0) 

header = [] 
report_card = [] 

# Count Stmnts for Required Data 
valid_count = 0 
app_req_count = 0 
intr_req_count = 0 
oe_intr_req_count = 0 
part_img_req_count = 0 
upc_req_count = 0 
unspsc_req_count = 0 
msds_req_count = 0 

# Count Stmts for Missing Data 
missing_app_count = 0 
missing_intr_count = 0 
missing_oe_intr_count = 0 
missing_mpcc_count = 0 
missing_attr_values_count = 0 
missing_part_img_count = 0 
missing_upc_count = 0 
missing_warr_text_count = 0 
missing_warr_pdf_count = 0 
missing_unspsc_count = 0 
missing_msds_count = 0 

for row_num in range(worksheet.nrows): 
    row = worksheet.row_values(row_num) 

    if row_num <= 4: 
     # print(row) # Print out the header 
     header.append([row[0], row[2]]) 
    else: 
     hq_line, part_no, part_class, appl_req, appl_count, intr_req, \ 
     intr_count, oe_intr_req, has_oe_intr, has_attr_editor, \ 
     has_attr_values, part_img_req, has_part_img, has_mpcc, warr_req, \ 
     has_warr_txt, has_warr_pdf, msds_req, has_msds, upc_req, has_upc, \ 
     has_unspsc, attr_count, attrval_count, valid_part = row 

     if valid_part == 'YES': 
      valid_count += 1 

      # Required Parts Count 
      if appl_req == 'YES': 
       app_req_count += 1 
      if intr_req == 'YES': 
       intr_req_count += 1 
      if oe_intr_req == 'YES': 
       oe_intr_req_count += 1 
      if part_img_req == 'YES': 
       part_img_req_count += 1 
      if upc_req == 'YES': 
       upc_req_count += 1 
      if msds_req == 'YES': 
       msds_req_count += 1 

      # Missing Data Counts 
      if appl_req == 'YES' and appl_count == '0': 
       missing_app_count += 1 
      if intr_req == 'YES' and intr_count == '0': 
       missing_intr_count += 1 
      if oe_intr_req == 'YES' and has_oe_intr == '0': 
       missing_oe_intr_count += 1 
      if has_mpcc == 'NO': 
       missing_mpcc_count += 1 
      if has_attr_values == 'NO': 
       missing_attr_values_count += 1 
      if has_part_img == 'NO': 
       missing_part_img_count += 1 
      if upc_req == 'YES' and has_upc == '0': 
       missing_upc_count += 1 
      if warr_req == 'YES' and has_warr_txt == 'NO': 
       missing_warr_text_count += 1 
      if warr_req == 'YES' and has_warr_pdf == 'NO': 
       missing_warr_pdf_count += 1 
      if has_unspsc == 'NO': 
       missing_unspsc_count += 1 
      if msds_req == 'YES' and has_msds == 'NO': 
       missing_msds_count += 1 

# Statements for Required Counts 
valid_parts = ('Number of Valid Parts: ', '{:,}'.format(valid_count)) 
application_required = ('Application Records Required: ', '{:,}'.format(app_req_count)) 
interchange_required = ('Interchange Records Required: ', '{:,}'.format(intr_req_count)) 
oe_interchange_required = ('OE Interchange Records Required: ', '{:,}'.format(oe_intr_req_count)) 
mpcc = ('MPCC Required: ', '{:,}'.format(valid_count)) # Every valid part requires a MPCC 
attributes = ('Attributes Required: ', '{:,}'.format(valid_count)) # Every valid part requires attributes 
image_required = ('Image Required: ', '{:,}'.format(part_img_req_count)) 
upc = ('UPC Requited: ', '{:,}'.format(upc_req_count)) 
warranties = ('Warranty Text/PDF Required: ', '{:,}'.format(valid_count)) # Require warranty text/pdf on all parts 
unspsc = ('UNSPSC Code Required: ', '{:,}'.format(valid_count)) # Require UNSPSC Codes for all parts 
msds = ('MSDS Required: ', '{:,}'.format(msds_req_count)) 

# Statements for Missing Counts 
missing_applications = ('Missing Applications: ', '{:,}'.format(missing_app_count)) 
missing_interchange = ('Missing Interchange: ', '{:,}'.format(missing_intr_count)) 
missing_oe_interchange = ('Missing OE Interchange: ', '{:,}'.format(missing_oe_intr_count)) 
missing_mpcc = ('Missing MPCC: ', '{:,}'.format(missing_mpcc_count)) 
missing_attributes = ('Missing Attributes: ', '{:,}'.format(missing_attr_values_count)) 
missing_image = ('Missing Image: ', '{:,}'.format(missing_part_img_count)) 
missing_UPC = ('Missing UPC: ', '{:,}'.format(missing_upc_count)) 
missing_warranty_text = ('Missing Warranty Text: ', '{:,}'.format(missing_warr_text_count)) 
missing_warranty_pdf = ('Missing Warranty PDF: ', '{:,}'.format(missing_warr_pdf_count)) 
missing_unspsc = ('Missing UNSPSC Code: ', '{:,}'.format(missing_unspsc_count)) 
missing_msds = ('Missing MSDS: ', '{:,}'.format(missing_msds_count)) 

# CSV Output 
report_card.append(valid_parts) 
report_card.append(application_required) 
report_card.append(interchange_required) 
report_card.append(oe_interchange_required) 
report_card.append(mpcc) 
report_card.append(attributes) 
report_card.append(image_required) 
report_card.append(upc) 
report_card.append(warranties) 
report_card.append(unspsc) 
report_card.append(msds) 
report_card.append(missing_applications) 
report_card.append(missing_interchange) 
report_card.append(missing_oe_interchange) 
report_card.append(missing_mpcc) 
report_card.append(missing_attributes) 
report_card.append(missing_image) 
report_card.append(missing_UPC) 
report_card.append(missing_warranty_text) 
report_card.append(missing_warranty_pdf) 
report_card.append(missing_unspsc) 
report_card.append(missing_msds) 

for row in header: 
    print(row) 

for x in report_card: 
    print(x) 

with open('Report_Card.csv', 'w', newline='') as f: 
    writer = csv.writer(f) 
    writer.writerows(header) 
    writer.writerows(report_card)   

を次のように、これはあなたにReport_Card.csvファイルを与える:

Supplier Line: Gates Rubber - Denver (GAT) 
Summary: Parts HQ Abbr 
ACCT No: 40013586 
RecCount: 10221 
Applicable Date: June 14, 2017 (Wednesday) 
Number of Valid Parts: 6 
Application Records Required: 0 
Interchange Records Required: 6 
OE Interchange Records Required: 6 
MPCC Required: 6 
Attributes Required: 6 
Image Required:  6 
UPC Requited: 6 
Warranty Text/PDF Required:  6 
UNSPSC Code Required: 6 
MSDS Required: 0 
Missing Applications: 0 
Missing Interchange: 0 
Missing OE Interchange:  0 
Missing MPCC: 0 
Missing Attributes:  0 
Missing Image: 0 
Missing UPC: 0 
Missing Warranty Text: 0 
Missing Warranty PDF: 0 
Missing UNSPSC Code: 0 
Missing MSDS: 0 

あなたのコードは確かに考え直すことができます。 1つの列に1つという非常に多くの変数を用意するのではなく、辞書のように動作するPython Counter()のようなものを使うことを考えてください。リストや辞書を使って作業すると、コードがはるかに簡単になります。

0

このスクリプトは、辞書のリストにExcelデータテーブルを変換することができます:

import xlrd 

workbook = xlrd.open_workbook('GAT_US_PartReview_2017-06-23.xlsx', on_demand = True) 
worksheet = workbook.sheet_by_index(0) 
first_row = [] # The row where we stock the name of the column 
for col in range(worksheet.ncols): 
    first_row.append(worksheet.cell_value(0,col)) 
# transform the workbook to a list of dictionaries 
data =[] 
for row in range(1, worksheet.nrows): 
    elm = {} 
    for col in range(worksheet.ncols): 
     elm[first_row[col]]=worksheet.cell_value(row,col) 
    data.append(elm) 
print data 

あなたはまた、列挙ので、私はそれが間違って私のやりそこなうを知っPandas

from pandas import * 
xls = ExcelFile('GAT_US_PartReview_2017-06-23.xlsx') 
df = xls.parse(xls.sheet_names[0]) 
print df.to_dict() 
関連する問題