2016-07-19 3 views
1

TimesのAPIを使用して「経済」という単語が含まれている2016年のNY Timesの記事をすべて取り込もうとしています。ValueError:dictにはif文でもフィールド名に含まれないフィールドが含まれています

はとValueErrorが

from nytimesarticle import articleAPI 
api = articleAPI('0282db2f333f4f4095edd19f0660c978') 

articles = api.search(q = 'economy', 
fq = {'headline':'economy', 'source':['Reuters','AP', 'The New 
YorkTimes']}, 
begin_date = 20151231) 

def parse_articles(articles): 

news = [] 
for i in articles['response']['docs']: 
    dic = {} 
    dic['id'] = i['_id'] 
if i['abstract'] is not None: 
     dic['abstract'] = i['abstract'].encode("utf8") 
    dic['headline'] = i['headline']['main'].encode("utf8") 
    dic['desk'] = i['news_desk'] 
    dic['date'] = i['pub_date'][0:10] # cutting time of day. 
    dic['section'] = i['section_name'] 
    if i['snippet'] is not None: 
     dic['snippet'] = i['snippet'].encode("utf8") 
    dic['source'] = i['source'] 
    dic['type'] = i['type_of_material'] 
    dic['url'] = i['web_url'] 
    dic['word_count'] = i['word_count'] 

    locations = [] 
    for x in range(0,len(i['keywords'])): 
     if 'glocations' in i['keywords'][x]['name']: 
      locations.append(i['keywords'][x]['value']) 
    dic['locations'] = locations 

    subjects = [] 
    for x in range(0,len(i['keywords'])): 
     if 'subject' in i['keywords'][x]['name']: 
      subjects.append(i['keywords'][x]['value']) 
    dic['subjects'] = subjects 
    news.append(dic) 
return(news) 

def get_articles(date,query): 

all_articles = [] 
for i in range(0,100): 
    articles = api.search(q = query, 
      fq = {'source':['Reuters','AP', 'The New York Times']}, 
      begin_date = 20151231, 
      end_date = 20160715, 
      sort='oldest', 
      page = str(i)) 
    articles = parse_articles(articles) 
    all_articles = all_articles + articles 
return(all_articles) 

econ_all = [] 
for i in range(2015,2016): 
print 'Processing' + str(i) + '...' 
econ_year = get_articles(str(i),'economy') 
econ_all = econ_all + econ_year 


import csv 
keys = econ_all[0].keys() 
with open('econ-mentions.csv', 'wb') as output_file: 

dict_writer = csv.DictWriter(output_file, keys) 
dict_writer.writeheader() 
dict_writer.writerows(econ_all) 

それは私の場合はそうです:dictのは、フィールド名でないフィールドが含まれています:「抽象」

そしてここでは私のコードで私は自分のコードの末尾に次のエラーメッセージが表示されますステートメントは、エラーを防ぐ必要があります。また、私が "writerow"を使用すると、ここで時々言及したように、CSVを作成せずに詳細のリスト全体を取得できます。どんな助けもありがとう!

答えて

0

あなたの問題は何か分かりませんが、このコードはコンテンツ付きのecon-mentions.csvファイルを作成します。

from nytimesarticle import articleAPI 


def parse_articles(articles): 
    news = [] 
    for i in articles['response']['docs']: 
     dic = {} 
     dic['id'] = i['_id'] 
    if i['abstract'] is not None: 
     dic['abstract'] = i['abstract'].encode("utf8") 
     dic['headline'] = i['headline']['main'].encode("utf8") 
     dic['desk'] = i['news_desk'] 
     dic['date'] = i['pub_date'][0:10] # cutting time of day. 
     dic['section'] = i['section_name'] 
     if i['snippet'] is not None: 
      dic['snippet'] = i['snippet'].encode("utf8") 
     dic['source'] = i['source'] 
     dic['type'] = i['type_of_material'] 
     dic['url'] = i['web_url'] 
     dic['word_count'] = i['word_count'] 

     locations = [] 
     for x in range(0,len(i['keywords'])): 
      if 'glocations' in i['keywords'][x]['name']: 
       locations.append(i['keywords'][x]['value']) 
     dic['locations'] = locations 

     subjects = [] 
     for x in range(0,len(i['keywords'])): 
      if 'subject' in i['keywords'][x]['name']: 
       subjects.append(i['keywords'][x]['value']) 
     dic['subjects'] = subjects 
     news.append(dic) 
    return(news) 

def get_articles(date,query): 
    all_articles = [] 
    for i in range(0,100): 
     articles = api.search(q = query, 
       fq = {'source':['Reuters','AP', 'The New York Times']}, 
       begin_date = 20151231, 
       end_date = 20160715, 
       sort='oldest', 
       page = str(i)) 
     articles = parse_articles(articles) 
     all_articles = all_articles + articles 
    return(all_articles) 



if __name__ == "__main__": 
    api = articleAPI('0282db2f333f4f4095edd19f0660c978') 

    articles = api.search(q = 'economy', 
    fq = {'headline':'economy', 'source':['Reuters','AP', 'The New YorkTimes']}, 
    begin_date = 20151231) 

    econ_all = [] 
    for i in range(2015,2016): 
     print 'Processing' + str(i) + '...' 
     econ_year = get_articles(str(i),'economy') 
     econ_all = econ_all + econ_year 

    import csv 
    keys = econ_all[0].keys() 
    with open('econ-mentions.csv', 'wb') as output_file: 
     dict_writer = csv.DictWriter(output_file, keys) 
     dict_writer.writeheader() 
     dict_writer.writerows(econ_all) 
+0

ありがとうございました!興味深いことに、それはコンテンツを作成しますが、それはそれが単語経済のすべての記事をキャプチャするかどうかは分かりません。しかし、これは助けて...私は仕事を続けます。 – user2209633

関連する問題