predict（）R関数キャレット・パッケージ・エラー：「newdata」行が異なる、「type」が受け入れられない

キャレット・パッケージを使用してロジスティック回帰分析を実行しています。すべてがpredict()機能を除いて、これまでで結構です predict（）R関数キャレット・パッケージ・エラー：「newdata」行が異なる、「type」が受け入れられない

データが入力されています。
Rはtypeパラメータがrawまたはprobすることになったがrawちょうど最後の列（二項変数の値）の正確なコピーを吐き出している私に言っています。 probは私に次のエラー与える：

"Error in dimnames(out)[[2]] <- modelFit$obsLevels : length of 'dimnames' [2] not equal to array extent In addition: Warning message: 'newdata' had 7 rows but variables found have 18 rows"

install.packages("pbkrtest") 
install.packages("caret") 
install.packages('e1071', dependencies=TRUE) 
#install.packages('caret', dependencies = TRUE) 
require(caret) 
library(caret) 

A=matrix(
    c(
    64830,18213,4677,24761,9845,17504,22137,12531,5842,28827,51840,4079,1000,2069,969,9173,11646,946,66161,18852,5581,27219,10159,17527,23402,11409,8115,31425,55993,0,0,1890,1430,7873,12779,627,68426,18274,5513,25687,10971,14104,19604,13438,6011,30055,57242,0,0,2190,1509,8434,10492,755,69716,18366,5735,26556,11733,16605,20644,15516,5750,31116,64330,0,0,1850,1679,9233,12000,500,73128,18906,5759,28555,11951,19810,22086,17425,6152,28469,72020,0,0,1400,1750,8599,12000,500,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,1,1 
), 

    nrow = 18, 
    ncol = 6, 
    byrow = FALSE) #"bycol" does NOT exist 
################### data set as vectors 
a<-c(64830,18213,4677,24761,9845,17504,22137,12531,5842,28827,51840,4079,1000,2069,969,9173,11646,946) 
b<-c(66161,18852,5581,27219,10159,17527,23402,11409,8115,31425,55993,0,0,1890,1430,7873,12779,627) 
c<-c(68426,18274,5513,25687,10971,14104,19604,13438,6011,30055,57242,0,0,2190,1509,8434,10492,755) 
d<-c(69716,18366,5735,26556,11733,16605,20644,15516,5750,31116,64330,0,0,1850,1679,9233,12000,500) 
e<-c(73128,18906,5759,28555,11951,19810,22086,17425,6152,28469,72020,0,0,1400,1750,8599,12000,500) 
f<-c(1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,1,1) 
###################### 

n<-nrow(A); 
K<-ncol(A)-1; 

Train <- createDataPartition(f, p=0.6, list=FALSE) #60% of data set is used as training. 
training <- A[ Train, ] 
testing <- A[ -Train, ] 
nrow(training) 

#this is the logistic formula: 
#estimates from logistic regression characterize the relationship between the predictor and response variable on a log-odds scale 
mod_fit <- train(f ~ a + b + c + d +e, data=training, method="glm", family="binomial") 
mod_fit 

#this isthe exponential function to calculate the odds ratios for each preditor: 
exp(coef(mod_fit$finalModel)) 

predict(mod_fit, newdata=training) 
predict(mod_fit, newdata=testing, type="prob")

出典

2016-05-27 user6318255

興味があるだけ、なぜあなたは、データフレームとA〜Fのベクトルの両方を定義する必要がありますか？ – zyurnaidi

@zyurnaidi私は二項変数を他の列（f vs a-e）と比較していたので、これを行いました。私はロジスティック式を実行する他の方法を知らなかった。より良い方法がありますか？ – user6318255

確かに、データフレームとしてデータを設定し、各カラムにその名前でアクセスするだけです。答えを見てください。 – zyurnaidi

を私が理解することは非常にわからないんだけど、Aは行列である（A、B、C、D、E、F）。したがって、2つのオブジェクトを作成する必要はありません。

install.packages("pbkrtest") 
install.packages("caret") 
install.packages('e1071', dependencies=TRUE) 
#install.packages('caret', dependencies = TRUE) 
require(caret) 
library(caret) 

A=matrix(
    c(
     64830,18213,4677,24761,9845,17504,22137,12531,5842,28827,51840,4079,1000,2069,969,9173,11646,946,66161,18852,5581,27219,10159,17527,23402,11409,8115,31425,55993,0,0,1890,1430,7873,12779,627,68426,18274,5513,25687,10971,14104,19604,13438,6011,30055,57242,0,0,2190,1509,8434,10492,755,69716,18366,5735,26556,11733,16605,20644,15516,5750,31116,64330,0,0,1850,1679,9233,12000,500,73128,18906,5759,28555,11951,19810,22086,17425,6152,28469,72020,0,0,1400,1750,8599,12000,500,1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,1,1 
), 
    nrow = 18, 
    ncol = 6, 
    byrow = FALSE) #"bycol" does NOT exist 

A <- data.frame(A) 
colnames(A) <- c('a','b','c','d','e','f') 
A$f <- as.factor(A$f) 

Train <- createDataPartition(A$f, p=0.6, list=FALSE) #60% of data set is used as training. 
training <- A[ Train, ] 
testing <- A[ -Train, ] 
nrow(training)

そして、あなたは

mod_fit <- train(f ~ a + b + c + d +e, data=training, method="glm", family="binomial") 
mod_fit 

#this isthe exponential function to calculate the odds ratios for each preditor: 
exp(coef(mod_fit$finalModel)) 

predict(mod_fit, newdata=training[,-which(colnames(training)=="f")]) 
predict(mod_fit, newdata=testing[,-which(colnames(testing)=="f")])

出典

2016-05-27 08:16:56 CClaire

fは因子である必要があり、ロジスティック回帰を試みています。生成される 'warnings（）'を参照してください。 – phiver

はい、それは本当ですが、それは単なる警告です、予測は同じです。私は正しいです。 – CClaire

短い答えを予測するための説明変数ではなく変数を入力しなければならない変数を予測するために、あなたはあなたのpredict式でfある説明変数を含めるべきではありません。だから、あなたが行う必要があります：あなたは、全体のデータセット（18観測値）を使用して、回帰分析を実行するため

predict(mod_fit, newdata=training[, -ncol(training]) 
predict(mod_fit, newdata=testing[, -ncol(testing])

警告メッセージ'newdata' had 11 rows but variables found have 18 rowsとの問題があるが、それの一部だけを使用して予測する（いずれかの11または7）。

EDIT：私たちが行うことができ、データの作成及びglmプロセスを簡素化するために：

library(caret) 
A <- data.frame(a = c(64830,18213,4677,24761,9845,17504,22137,12531,5842,28827,51840,4079,1000,2069,969,9173,11646,946), 
       b = c(66161,18852,5581,27219,10159,17527,23402,11409,8115,31425,55993,0,0,1890,1430,7873,12779,627), 
       c = c(68426,18274,5513,25687,10971,14104,19604,13438,6011,30055,57242,0,0,2190,1509,8434,10492,755), 
       d = c(69716,18366,5735,26556,11733,16605,20644,15516,5750,31116,64330,0,0,1850,1679,9233,12000,500), 
       e = c(73128,18906,5759,28555,11951,19810,22086,17425,6152,28469,72020,0,0,1400,1750,8599,12000,500), 
       f = c(1,1,1,0,1,0,0,0,0,1,0,1,1,1,1,1,1,1)) 

Train <- createDataPartition(f, p=0.6, list=FALSE) #60% of data set is used as training. 
training <- A[ Train, ] 
testing <- A[ -Train, ] 

mod_fit <- train(f ~ a + b + c + d + e, data=training, method="glm", family="binomial")

出典

2016-05-27 08:21:41 zyurnaidi

predict（）R関数キャレット・パッケージ・エラー：「newdata」行が異なる、「type」が受け入れられない

答えて

関連する問題