あなたは完璧な作品を書いたコードは、簡潔な一見同じ結果を得るための方法がありますが:CVについて
brglm_model <- brglm(formula = response ~ predictor , family = "binomial", data = train)
brglm_pred <- predict(object = brglm_model, newdata = test , type = "response")
が、あなたは私が推測する数行のコードを記述する必要があります。
#Setting the number of folds, and number of instances in each fold
n_folds <- 5
fold_size <- nrow(dataset) %/% 5
residual <- nrow(dataset) %% 5
#label the instances based on the number of folds
cv_labels <- c(rep(1,fold_size),rep(2,fold_size), rep(3,fold_size), rep(4,fold_size), rep(5,fold_size), rep(5,residual))
# the error term would differ based on each threshold value
t_seq <- seq(0.1,0.9,by = 0.1)
index_mat <- matrix(ncol = (n_folds+1) , nrow = length(t_seq))
index_mat[,1] <- t_seq
# the main loop for calculation of the CV error on each fold
for (i in 1:5){
train <- dataset %>% filter(cv_labels != i)
test <- dataset %>% filter(cv_labels == i)
brglm_cv_model <- brglm(formula = response_var ~ . , family = "binomial", data = train)
brglm_cv_pred <- predict(object = brglm_model, newdata = test , type = "response")
# error formula that you want, e.g. misclassification
counter <- 0
for (treshold in t_seq) {
counter <- counter + 1
conf_mat <- table(factor(test$response_var) , factor(brglm_cv_pred>treshold, levels = c("FALSE","TRUE")))
sen <- conf_mat[2,2]/sum(conf_mat[2,])
# other indices can be computed as follows
#spec <- conf_mat[1,1]/sum(conf_mat[1,])
#prec <- conf_mat[2,2]/sum(conf_mat[,2])
#F1 <- (2*prec * sen)/(prec+sen)
#accuracy <- (conf_mat[1,1]+conf_mat[2,2])/sum(conf_mat)
#here I am only interested in sensitivity
index_mat[counter,(i+1)] <- sen
}
}
# final data.frame would be the mean of sensitivity over each threshold value
final_mat <- matrix(nrow = length(t_seq), ncol = 2)
final_mat[,1] <- t_seq
final_mat[,2] <- apply(X = index_mat[,-1] , MARGIN = 1 , FUN = mean)
final_mat <- data.frame(final_mat)
colnames(final_mat) <- c("treshold","sensitivity")
#why not having a look at the CV-sensitivity of the model over threshold values?
ggplot(data = final_mat) +
geom_line(aes(x = treshold, y = sensitivity), color = "blue")