0
トレーニングセットとテストセットにデータセットの分割を実装しようとしていますが、45行目から関数に問題が発生しました。実行した後、プログラムは「KeyError例外:667952」と返す(エラーで数iがプログラムを実行するたびに異なる)トレーニングとテストセットの分割
1 import numpy as np
2 import pandas as pd
3 import random
4
5 data_file = pd.read_csv('loan.csv')
6
7 # variable preprocessing
8
9 data_file['loan_status'] = np.where(data_file['loan_status'].isin(['Fully Paid', 'Current']), 1, 0)
10 loan_stat=data_file['loan_status']
11
12 m = {
13 'n/a': 0,
14 '< 1 year': 0,
15 '1 year': 1,
16 '2 years': 2,
17 '3 years': 3,
18 '4 years': 4,
19 '5 years': 5,
20 '6 years': 6,
21 '7 years': 7,
22 '8 years': 8,
23 '9 years': 9,
24 '10+ years': 10
25 }
26 emp_length=data_file.emp_length.map(m)
27
28 annual_inc=data_file['annual_inc']
29 delinq_2yrs=data_file['delinq_2yrs']
30 dti=data_file['dti']
31 loan_amnt=data_file['loan_amnt']
32 installment=data_file['installment']
33 int_rate=data_file['int_rate']
34 total_acc=data_file['total_acc']
35 open_acc=data_file['open_acc']
36 pub_rec=data_file['pub_rec']
37 acc_now_delinq=data_file['acc_now_delinq']
38
39 #variables combined into one dataset
40
41 data_set=[annual_inc, delinq_2yrs, dti, emp_length, loan_amnt, installment,
42 int_rate, open_acc, total_acc, acc_now_delinq, loan_stat]
43 result=pd.concat(data_set,axis=1)
44
45 def splitDataSet(x, splitRatio):
46 trainSize = int(len(x)*splitRatio)
47 trainSet=[]
48 copy=x
49 while len(trainSet)<trainSize:
50 index=random.randrange(len(copy))
51 trainSet.append(copy.pop(index))
52 return[trainSet, copy]
53
54 splitRatio=0.67
55 train, test = splitDataSet(result, splitRatio)
56 print(train)
誰もがこの障害を克服する方法を知っていますか? ありがとう