1
ベルヌーイ・ナイーブベイズのモデルでどれが最良の精度を与えるかを調べるために、ある範囲のアルファ(LaPlace平滑化パラメータ)上でGridSearchCVを使用したいと思います。GridSearchCVの初期化
def binarize_pixels(data, threshold=0.784):
# Initialize a new feature array with the same shape as the original data.
binarized_data = np.zeros(data.shape)
# Apply a threshold to each feature.
for feature in range(data.shape[1]):
binarized_data[:,feature] = data[:,feature] > threshold
return binarized_data
binarized_train_data = binarize_pixels(mini_train_data)
def BNB():
clf = BernoulliNB()
clf.fit(binarized_train_data, mini_train_labels)
scoring = clf.score(mini_train_data, mini_train_labels)
predsNB = clf.predict(dev_data)
print "Bernoulli binarized model accuracy: {:.4}".format(np.mean(predsNB == dev_labels))
私GridSearchクロス検証はそうではないモデルでは、正常に動作:
pipeline = Pipeline([('classifier', BNB())])
def P8(alphas):
gs_clf = GridSearchCV(pipeline, param_grid = alphas, refit=True)
y_predictions = gs_clf.best_estimator_.predict(dev_data)
print classification_report(dev_labels, y_predictions)
alphas = {'alpha' : [0.0, 0.0001, 0.001, 0.01, 0.1, 0.5, 1.0, 2.0, 10.0]}
P8(alphas)
私ははAttributeErrorを得る: 'GridSearchCV' オブジェクトが問題である