2016-11-09 9 views
0

ロジスティック回帰の既存の実装を適用していますが、私が間違っていることを理解できません。ここでロジスティック回帰の実装 - なぜこれが収束しないのですか?

は私の実装である:ここでは

from scipy.optimize import fmin_bfgs 
import numpy as np 
import pandas as pd 
# With help from http://stackoverflow.com/questions/13794754/logistic-regression-using-scipy 
# as well as https://bryantravissmith.com/2015/12/29/implementing-logistic-regression-from-scratch-part-2-python-code/ 

def sigma(features, weights): 
    """returns sigma(<w,x>)""" 
    return 1/(1 + np.exp(-features.dot(weights))) 


def log_likelihood(weights, features, labels): 
    """calculates -ln p(t|w)""" 
    s = sigma(features, weights) 
    #s += 1e-24 # pseudocount to prevent logs of 0 
    t = labels * np.log(s + 1e-24) 
    t2 = (1 - labels) * (np.log((1 - s) + 1e-24)) 
    ll = (t + t2).sum() 
    print -ll 
    return -ll 


def gradient_log_likelihood(weights, features, labels): 
    """calculates the gradient (Jacobian) of the log likelihood""" 
    error = labels - sigma(features, weights) 
    grad = (error * features).sum(axis=0) 
    return grad.reshape(grad.shape[0], 1) 

は、サンプルデータセットである:私は始まる重みベクトルに個別にこれらの各メソッドを実行する場合

labels = np.array([0, 1, 1]).reshape(3, 1) 
df = pd.DataFrame.from_dict({'a': [1,2,3], 'b': [2,3,4], 'c': [6,7,8]}) 

n, m = df.shape 
weights = np.zeros(m + 1).reshape(m + 1, 1) # zero vector of starting weights 

# add the intercept column 
features = np.ones((n, m + 1)) # make matrix with all 1's 
features[:,1:] = df # replace the 1's in all columns after column 0 with actual data 

、彼らが実行されます。私は最適化しようとしたらしかし、私は、形状誤差を取得:

error = (labels - sigma(features, weights)) 

は、3×3のマトリックスの中に3×1のベクトルからerrorを変換します。

optimized = fmin_bfgs(log_likelihood, x0=weights, args=(features, labels), gtol=1e-4, fprime=gradient_log_likelihood) 

ValueError        Traceback (most recent call last) 
<ipython-input-26-34c3cde48ac4> in <module>() 
----> 1 optimized = fmin_bfgs(log_likelihood, x0=weights, args=(features, labels), gtol=1e-4, fprime=gradient_log_likelihood) 

/Users/ifiddes/anaconda/lib/python2.7/site-packages/scipy/optimize/optimize.pyc in fmin_bfgs(f, x0, fprime, args, gtol, norm, epsilon, maxiter, full_output, disp, retall, callback) 
    791    'return_all': retall} 
    792 
--> 793  res = _minimize_bfgs(f, x0, args, fprime, callback=callback, **opts) 
    794 
    795  if full_output: 

/Users/ifiddes/anaconda/lib/python2.7/site-packages/scipy/optimize/optimize.pyc in _minimize_bfgs(fun, x0, args, jac, callback, gtol, norm, eps, maxiter, disp, return_all, **unknown_options) 
    845  else: 
    846   grad_calls, myfprime = wrap_function(fprime, args) 
--> 847  gfk = myfprime(x0) 
    848  k = 0 
    849  N = len(x0) 

/Users/ifiddes/anaconda/lib/python2.7/site-packages/scipy/optimize/optimize.pyc in function_wrapper(*wrapper_args) 
    287  def function_wrapper(*wrapper_args): 
    288   ncalls[0] += 1 
--> 289   return function(*(wrapper_args + args)) 
    290 
    291  return ncalls, function_wrapper 

<ipython-input-3-9678bc972b41> in gradient_log_likelihood(weights, features, labels) 
     2   """calculates the gradient (Jacobian) of the log likelihood""" 
     3   error = labels - sigma(features, weights) 
----> 4   grad = (error * features).sum(axis=0) 
     5   return grad.reshape(grad.shape[0], 1) 
     6 

ValueError: operands could not be broadcast together with shapes (3,3) (3,4) 

答えて

0

問題は、何とかこの行です。あなたがerrorを印刷しgradient_log_likelihood(weights, features, labels)を実行した場合、あなたは出力を得ること

注:

[[-0.5] 
[ 0.5] 
[ 0.5]] 

そして、あなたは、最適化を実行した場合、次のようになります。とValueErrorに加えて

[[-0.5 -0.5 -0.5] 
[ 0.5 0.5 0.5] 
[ 0.5 0.5 0.5]] 

を。これは、labels - sigma(features, weights)が形状を変更するためです。

gradient_log_likelihood(weights, features, labels)を実行しても最適化機能に新しいエラーが発生した場合は、理由を調べることができます。ただし、ハックして最初の列を外してもerror = (labels - sigma(features, weights)).T[0].reshape(3,1)と同じ解決方法が得られます。

optimized = fmin_bfgs(log_likelihood, x0=weights, args=(features, labels), gtol=1e-3, fprime=gradient_log_likelihood) 

6.23832462504 
--------------------------------------------------------------------------- 
ValueError        Traceback (most recent call last) 
<ipython-input-135-d7e8b04daeba> in <module>() 
    ----> 1 optimized = fmin_bfgs(log_likelihood, x0=weights, args=(features, labels), gtol=1e-3, fprime=gradient_log_likelihood) 

    /Library/Python/2.7/site-packages/scipy/optimize/optimize.pyc in fmin_bfgs(f, x0, fprime, args, gtol, norm, epsilon, maxiter, full_output, disp, retall, callback) 
     791    'return_all': retall} 
     792 
    --> 793  res = _minimize_bfgs(f, x0, args, fprime, callback=callback, **opts) 
     794 
     795  if full_output: 

    /Library/Python/2.7/site-packages/scipy/optimize/optimize.pyc in _minimize_bfgs(fun, x0, args, jac, callback, gtol, norm, eps, maxiter, disp, return_all, **unknown_options) 
     863    alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \ 
     864      _line_search_wolfe12(f, myfprime, xk, pk, gfk, 
    --> 865           old_fval, old_old_fval) 
     866   except _LineSearchError: 
     867    # Line search failed to find a better solution. 

    /Library/Python/2.7/site-packages/scipy/optimize/optimize.pyc in _line_search_wolfe12(f, fprime, xk, pk, gfk, old_fval, old_old_fval, **kwargs) 
     697  ret = line_search_wolfe1(f, fprime, xk, pk, gfk, 
     698        old_fval, old_old_fval, 
    --> 699        **kwargs) 
     700 
     701  if ret[0] is None: 

    /Library/Python/2.7/site-packages/scipy/optimize/linesearch.pyc in line_search_wolfe1(f, fprime, xk, pk, gfk, old_fval, old_old_fval, args, c1, c2, amax, amin, xtol) 
     95   return np.dot(gval[0], pk) 
     96 
    ---> 97  derphi0 = np.dot(gfk, pk) 
     98 
     99  stp, fval, old_fval = scalar_search_wolfe1(

    ValueError: shapes (4,1) and (4,1) not aligned: 1 (dim 1) != 4 (dim 0) 
関連する問題