私はNaver Crawler(韓国のGoogle:P)で作業しています。私は今このコードを1週間作業しており、解決するための最後の課題が1つあります。したがって、下のコードでは、Naver APIを使用したデータクロールと各ループの「js」へのデータ受信を示しています。私がする必要があるのは、各データフレーム(dfdfdf)を結合し、一番下のものを結合することだけです。しかし、私の結果は常に最後にループしたデータを表示します。要するに、私が取っているループごとにDataFrameを追加したいということです。 私は結合しようとしましたが、結合していません。私に知らせてください。もし私のコードが以下のような意味を持たないのなら、私に教えてください!Naver Crawler:各ループごとにDataFrameを結合するPython
import os
import sys
import urllib.request
import pandas as pd
import json
import numpy as np
from datetime import datetime, timedelta
import time
ex = pd.ExcelFile('mat_hierarchy.xlsx').parse('Sheet1')
DNA1 = []
#adding list to DNA
DNA1.extend(ex.iloc[:,3])
DNA1.extend(ex.iloc[:,2])
seen = set()
DNA = []
for item in DNA1:
if item not in seen:
seen.add(item)
DNA.append(item)
# len(DNA)
#Setting Date weekly or daily
#dd = pd.date_range('2016-01-01',datetime.now().date() - timedelta(2))
dd = pd.date_range(start = '2016-01-01',end = datetime.now().date() - timedelta(2), freq = 'W-MON')
setendDate = datetime.now().date() - timedelta(1)
endDate = setendDate.strftime('%Y-%m-%d')
#Setting DataFrame & List
Data = pd.DataFrame(index=dd)
#Naver API Connection
client_id = "ID"
client_secret = "PW"
url = "https://openapi.naver.com/v1/datalab/search";
#Setting requests
body_intro = "{\"startDate\":\"2016-01-01\",\"endDate\":\""
body_endDate = "\",\"timeUnit\":\"date\",\"keywordGroups\":[{\"groupName\":\""
body_keywords = "\",\"keywords\":[\""
body_groupName = "\"]},{\"groupName\":\""
body_last = "\"]}],\"ages\":[\"1\",\"2\",\"3\",\"4\",\"5\",\"6\",\"7\",\"8\",\"9\",\"10\",\"11\"]}"
df_list=[]
for i in range(2270,len(DNA),5):
if((len(DNA)%5==0) or (i < (len(DNA)-(len(DNA)%5)))):
body = body_intro + endDate + body_endDate + DNA[i] + body_keywords + DNA[i] + body_groupName + DNA[i+1] + body_keywords + DNA[i+1] + body_groupName + DNA[i+2] + body_keywords + DNA[i+2] + body_groupName + DNA[i+3] + body_keywords + DNA[i+3] + body_groupName + DNA[i+4] + body_keywords + DNA[i+4] + body_last
print("5")
request = urllib.request.Request(url)
request.add_header("X-Naver-Client-Id",client_id)
request.add_header("X-Naver-Client-Secret",client_secret)
request.add_header("Content-Type","application/json")
response = urllib.request.urlopen(request, data=body.encode("utf-8"))
rescode = response.getcode()
if(rescode==200):
response_body = response.read()
js = response_body.decode('utf-8')
else:
print("Error Code:" + rescode)
#checking empty values & append to df_list
d = json.loads(js)
lst = [pd.DataFrame(r['data']).set_index('period').rename(columns={'ratio' : r['title']})
if len(r['data']) > 0
else pd.DataFrame([np.nan], columns=[r['title']], index=[d['startDate']])
for r in d['results']]
df = pd.concat(lst, 1)
dfdfdf = Data.join(df)
df_list.append(dfdfdf)
elif(len(DNA)%5==4):
body = body_intro + endDate + body_endDate + DNA[i] + body_keywords + DNA[i] + body_groupName + DNA[i+1] + body_keywords + DNA[i+1] + body_groupName + DNA[i+2] + body_keywords + DNA[i+2] + body_groupName + DNA[i+3] + body_keywords + DNA[i+3] + body_last
print("4")
request = urllib.request.Request(url)
request.add_header("X-Naver-Client-Id",client_id)
request.add_header("X-Naver-Client-Secret",client_secret)
request.add_header("Content-Type","application/json")
response = urllib.request.urlopen(request, data=body.encode("utf-8"))
rescode = response.getcode()
if(rescode==200):
response_body = response.read()
js = response_body.decode('utf-8')
else:
print("Error Code:" + rescode)
#checking empty values & append to df_list
d = json.loads(js)
lst = [pd.DataFrame(r['data']).set_index('period').rename(columns={'ratio' : r['title']})
if len(r['data']) > 0
else pd.DataFrame([np.nan], columns=[r['title']], index=[d['startDate']])
for r in d['results']]
df = pd.concat(lst, 1)
dfdfdf = Data.join(df)
df_list.append(dfdfdf)
elif(len(DNA)%5==3):
body = body_intro + endDate + body_endDate + DNA[i] + body_keywords + DNA[i] + body_groupName + DNA[i+1] + body_keywords + DNA[i+1] + body_groupName + DNA[i+2] + body_keywords + DNA[i+2] + body_last
print("3")
request = urllib.request.Request(url)
request.add_header("X-Naver-Client-Id",client_id)
request.add_header("X-Naver-Client-Secret",client_secret)
request.add_header("Content-Type","application/json")
response = urllib.request.urlopen(request, data=body.encode("utf-8"))
rescode = response.getcode()
if(rescode==200):
response_body = response.read()
js = response_body.decode('utf-8')
else:
print("Error Code:" + rescode)
#checking empty values & append to df_list
d = json.loads(js)
lst = [pd.DataFrame(r['data']).set_index('period').rename(columns={'ratio' : r['title']})
if len(r['data']) > 0
else pd.DataFrame([np.nan], columns=[r['title']], index=[d['startDate']])
for r in d['results']]
df = pd.concat(lst, 1)
dfdfdf = Data.join(df)
df_list.append(dfdfdf)
elif(len(DNA)%5==2):
body = body_intro + endDate + body_endDate + DNA[i] + body_keywords + DNA[i] + body_groupName + DNA[i+1] + body_keywords + DNA[i+1] + body_last
print("2")
request = urllib.request.Request(url)
request.add_header("X-Naver-Client-Id",client_id)
request.add_header("X-Naver-Client-Secret",client_secret)
request.add_header("Content-Type","application/json")
response = urllib.request.urlopen(request, data=body.encode("utf-8"))
rescode = response.getcode()
if(rescode==200):
response_body = response.read()
js = response_body.decode('utf-8')
else:
print("Error Code:" + rescode)
#checking empty values & append to df_list
d = json.loads(js)
lst = [pd.DataFrame(r['data']).set_index('period').rename(columns={'ratio' : r['title']})
if len(r['data']) > 0
else pd.DataFrame([np.nan], columns=[r['title']], index=[d['startDate']])
for r in d['results']]
df = pd.concat(lst, 1)
dfdfdf = Data.join(df)
df_list.append(dfdfdf)
else:
body = body_intro + endDate + body_endDate + DNA[i] + body_keywords + DNA[i] + body_last
print("1")
request = urllib.request.Request(url)
request.add_header("X-Naver-Client-Id",client_id)
request.add_header("X-Naver-Client-Secret",client_secret)
request.add_header("Content-Type","application/json")
response = urllib.request.urlopen(request, data=body.encode("utf-8"))
rescode = response.getcode()
if(rescode==200):
response_body = response.read()
js = response_body.decode('utf-8')
else:
print("Error Code:" + rescode)
#checking empty values & append to df_list
d = json.loads(js)
lst = [pd.DataFrame(r['data']).set_index('period').rename(columns={'ratio' : r['title']})
if len(r['data']) > 0
else pd.DataFrame([np.nan], columns=[r['title']], index=[d['startDate']])
for r in d['results']]
df = pd.concat(lst, 1)
dfdfdf = Data.join(df)
df_list.append(dfdfdf)
#Combining all Data
#Naver = Data.join(dfdfdf)
print("end")
time.sleep(.5)
Final = pd.concat(df_list, axis=1)
Final.to_csv("Naver123.csv")
'if'ブロック間で* body *だけが変更されますか? – Parfait
はい。リクエスト "body"のキーワードのみが変更されています。 – Kang