ロジスティック回帰の既存の実装を適用していますが、私が間違っていることを理解できません。ここでロジスティック回帰の実装 - なぜこれが収束しないのですか?
は私の実装である:ここでは
from scipy.optimize import fmin_bfgs
import numpy as np
import pandas as pd
# With help from http://stackoverflow.com/questions/13794754/logistic-regression-using-scipy
# as well as https://bryantravissmith.com/2015/12/29/implementing-logistic-regression-from-scratch-part-2-python-code/
def sigma(features, weights):
"""returns sigma(<w,x>)"""
return 1/(1 + np.exp(-features.dot(weights)))
def log_likelihood(weights, features, labels):
"""calculates -ln p(t|w)"""
s = sigma(features, weights)
#s += 1e-24 # pseudocount to prevent logs of 0
t = labels * np.log(s + 1e-24)
t2 = (1 - labels) * (np.log((1 - s) + 1e-24))
ll = (t + t2).sum()
print -ll
return -ll
def gradient_log_likelihood(weights, features, labels):
"""calculates the gradient (Jacobian) of the log likelihood"""
error = labels - sigma(features, weights)
grad = (error * features).sum(axis=0)
return grad.reshape(grad.shape[0], 1)
は、サンプルデータセットである:私は始まる重みベクトルに個別にこれらの各メソッドを実行する場合
labels = np.array([0, 1, 1]).reshape(3, 1)
df = pd.DataFrame.from_dict({'a': [1,2,3], 'b': [2,3,4], 'c': [6,7,8]})
n, m = df.shape
weights = np.zeros(m + 1).reshape(m + 1, 1) # zero vector of starting weights
# add the intercept column
features = np.ones((n, m + 1)) # make matrix with all 1's
features[:,1:] = df # replace the 1's in all columns after column 0 with actual data
、彼らが実行されます。私は最適化しようとしたらしかし、私は、形状誤差を取得:
error = (labels - sigma(features, weights))
は、3×3のマトリックスの中に3×1のベクトルからerror
を変換します。
optimized = fmin_bfgs(log_likelihood, x0=weights, args=(features, labels), gtol=1e-4, fprime=gradient_log_likelihood)
ValueError Traceback (most recent call last)
<ipython-input-26-34c3cde48ac4> in <module>()
----> 1 optimized = fmin_bfgs(log_likelihood, x0=weights, args=(features, labels), gtol=1e-4, fprime=gradient_log_likelihood)
/Users/ifiddes/anaconda/lib/python2.7/site-packages/scipy/optimize/optimize.pyc in fmin_bfgs(f, x0, fprime, args, gtol, norm, epsilon, maxiter, full_output, disp, retall, callback)
791 'return_all': retall}
792
--> 793 res = _minimize_bfgs(f, x0, args, fprime, callback=callback, **opts)
794
795 if full_output:
/Users/ifiddes/anaconda/lib/python2.7/site-packages/scipy/optimize/optimize.pyc in _minimize_bfgs(fun, x0, args, jac, callback, gtol, norm, eps, maxiter, disp, return_all, **unknown_options)
845 else:
846 grad_calls, myfprime = wrap_function(fprime, args)
--> 847 gfk = myfprime(x0)
848 k = 0
849 N = len(x0)
/Users/ifiddes/anaconda/lib/python2.7/site-packages/scipy/optimize/optimize.pyc in function_wrapper(*wrapper_args)
287 def function_wrapper(*wrapper_args):
288 ncalls[0] += 1
--> 289 return function(*(wrapper_args + args))
290
291 return ncalls, function_wrapper
<ipython-input-3-9678bc972b41> in gradient_log_likelihood(weights, features, labels)
2 """calculates the gradient (Jacobian) of the log likelihood"""
3 error = labels - sigma(features, weights)
----> 4 grad = (error * features).sum(axis=0)
5 return grad.reshape(grad.shape[0], 1)
6
ValueError: operands could not be broadcast together with shapes (3,3) (3,4)