2016-04-09 22 views
0

このコードをデバッグするのに、これを修正することはできません。私はPythonの初心者なので、urllib2メソッドをリクエストで変更することはできません。リクエストでは、私はkeyerrorに直面しています。「id」は存在しません。一番早く助けてください。urllib2.HTTPErrorに遭遇しました:HTTPエラー400:不正なリクエスト

Pythonコード:

import os 
os.environ['http_proxy']='' 
import urllib 
import urllib2 
import json 
import mysql.connector 
import datetime 
from config import config 
import requests 

def connect_db(): 
    connection = mysql.connector.connect(**config) 
    return connection 


def create_post_url(graph_url, APP_ID, APP_SECRET): 
    #create authenticated post URL 
    post_args = "/posts/?key=value&access_token=" + APP_ID + "|" + APP_SECRET 
    post_url = graph_url + post_args 

    return post_url 

def render_to_json(graph_url): 
    #render graph url call to JSON 
    web_response = urllib2.Request(graph_url) 
    response= urllib2.urlopen(web_response) 
    readable_data= response.read() 
    json_data = json.loads(readable_data) 

    return json_data 

def scrape_posts_by_date(graph_url, date, post_data, APP_ID, APP_SECRET): 
    #render URL to JSON 
    page_posts = render_to_json(graph_url) 

    #extract next page 
    next_page = page_posts["paging"]["next"] 

    #grab all posts 
    page_posts = page_posts["data"] 

    #boolean to tell us when to stop collecting 
    collecting = True 

    #for each post capture data 
    #for post in page_posts: 
      #for each post capture data 
    for post in page_posts: 
     try: 
      likes_count = get_likes_count(post["id"], APP_ID, APP_SECRET) 
      current_post = [post["id"], post["message"], 
        post["created_time"], 
             post["shares"]["count"]]   

     except Exception: 
      current_post = [ "error", "error", "error", "error"] 

     if current_post[2] != "error": 
      print date 
      print current_post[3] 
      #compare dates 
      if date <= current_post[3]: 
       post_data.append(current_post) 

      elif date > current_post[2]: 
       print "Done collecting" 
       collecting = False 
       break 


    #If we still don't meet date requirements, run on next page   
    if collecting == True: 
     scrape_posts_by_date(next_page, date, post_data, APP_ID, APP_SECRET) 

    return post_data 

def get_likes_count(post_id, APP_ID, APP_SECRET): 
    #create Graph API Call 
    graph_url = "https://graph.facebook.com/" 
    likes_args = post_id + "/likes?summary=true&key=value&access_token" + APP_ID + "|" + APP_SECRET 
    likes_url = graph_url + likes_args 
    likes_json = render_to_json(likes_url) 

    #pick out the likes count 
    count_likes = likes_json["summary"]["total_count"] 

    return count_likes 

def create_comments_url(graph_url, post_id, APP_ID, APP_SECRET): 
    #create Graph API Call 
    comments_args = post_id + "/comments/?key=value&access_token=" + APP_ID + "|" + APP_SECRET 
    comments_url = graph_url + comments_args 

    return comments_url 

def get_comments_data(comments_url, comment_data, post_id): 
    #render URL to JSON 
    comments = render_to_json(comments_url)["data"] 

    #for each comment capture data 
    for comment in comments: 
     try: 
      current_comments = [comment["id"], comment["message"], comment["like_count"], 
         comment["created_time"], post_id] 
      print current_comments 
      comment_data.append(current_comments) 

     except Exception: 
      current_comments = ["error", "error", "error", "error", "error"] 


    #check if there is another page 
    try: 
     #extract next page 
     next_page = comments["paging"]["next"] 
    except Exception: 
     next_page = None 


    #if we have another page, recurse 
    if next_page is not None: 
     get_comments_data(next_page, comment_data, post_id) 
    else: 
     return comment_data 

def main(): 
    #simple data pull App Secret and App ID 
    APP_SECRET = "app_secret" 
    APP_ID = "app_id" 

    #to find go to page's FB page, at the end of URL find username 
    #e.g. http://facebook.com/walmart, walmart is the username 
    list_companies = ["walmart", "cisco", "pepsi", "facebook"] 
    graph_url = "https://graph.facebook.com/" 

    #the time of last weeks crawl 
    last_crawl = datetime.datetime.now() - datetime.timedelta(weeks=1) 
    last_crawl = last_crawl.isoformat() 

    #create db connection 
    connection = connect_db() 
    cursor = connection.cursor() 

    #SQL statement for adding Facebook page data to database 
    insert_info = ("INSERT INTO page_info " 
        "(fb_id, likes, talking_about, username)" 
        "VALUES (%s, %s, %s, %s)") 

    #SQL statement for adding post data    
    insert_posts = ("INSERT INTO post_info " 
        "(fb_post_id, message, likes_count, time_created, shares, page_id)" 
        "VALUES (%s, %s, %s, %s, %s, %s)") 

    #SQL statement for adding comment data 
    insert_comments = ("INSERT INTO comment_info " 
         "(comment_id, message, likes_count, time_created, post_id)" 
         "VALUES (%s, %s, %s, %s, %s)") 

    for company in list_companies: 
     #make graph api url with company username 
     current_page = graph_url + company 

     #open public page in facebook graph api 
     json_fbpage = render_to_json(current_page) 


     #gather our page level JSON Data 
     page_data = [json_fbpage["id"], json_fbpage["likes"], 
        json_fbpage["talking_about_count"], 
        json_fbpage["username"]] 
     print page_data 

     #extract post data 
     post_url = create_post_url(current_page, APP_ID, APP_SECRET) 
     post_data = [] 
     post_data = scrape_posts_by_date(post_url, last_crawl, post_data) 

     print post_data 

     #insert the data we pulled into db 
     cursor.execute(insert_info, page_data) 

     #grab primary key 
     last_key = cursor.lastrowid 

     comment_data = [] 

     #loop through and insert data 
     for post in post_data: 
      post.append(last_key) 
      cursor.execute(insert_posts, post) 

      #capture post id of data just inserted 
      post_key = cursor.lastrowid 
      print post_key 
      comment_url = create_comments_url(graph_url, post[0], APP_ID, APP_SECRET) 
      comments = get_comments_data(comment_url, comment_data, post_key) 

      #insert comments 
      for comment in comments: 
       cursor.execute(insert_comments, comment) 

     #commit the data to the db 
     connection.commit() 

    connection.close() 


if __name__ == "__main__": 
    main()  

これは私が取得していますエラーです:

Traceback (most recent call last): 
    File "script.py", line 210, in <module> 
    main()  
    File "script.py", line 164, in main 
    json_fbpage = render_to_json(current_page) 
    File "script.py", line 26, in render_to_json 
    response= urllib2.urlopen(web_response) 
    File "/usr/lib/python2.7/urllib2.py", line 127, in urlopen 
    return _opener.open(url, data, timeout) 
    File "/usr/lib/python2.7/urllib2.py", line 410, in open 
    response = meth(req, response) 
    File "/usr/lib/python2.7/urllib2.py", line 523, in http_response 
    'http', request, response, code, msg, hdrs) 
    File "/usr/lib/python2.7/urllib2.py", line 448, in error 
    return self._call_chain(*args) 
    File "/usr/lib/python2.7/urllib2.py", line 382, in _call_chain 
    result = func(*args) 
    File "/usr/lib/python2.7/urllib2.py", line 531, in http_error_default 
    raise HTTPError(req.get_full_url(), code, msg, hdrs, fp) 
urllib2.HTTPError: HTTP Error 400: Bad Request 
+0

質問を編集して、完全なエラーが含まれるようにしてください。 – IanAuld

+0

あなたの質問に多すぎるコードがあります。問題の内容とペアにしてみてください。 @IanAuldが言ったように、完全なトレースバックをコピーして貼り付けてください。リクエストは本当にurllibより優れていますが、あなたが望むものを使用してください –

+0

私は完全なエラーを追加しました。 –

答えて

0

エラーがあなたのアクセストークンを要求するページ情報URLによるものです。ページ情報のグラフAPIを使用するときにアクセストークンが存在しない場合、同じエラーがスローされます。

アクセストークンを追加するには、current_page変数の詳細を変更する必要があります。

関連する問題