Octaveで質問をうまく解決した後、私はOctaveコードをPythonに変換しようとしました。debug numpyオクターブ変換、可能な行列の問題
ここは私のコードであり、fmin_cgでは機能しません。私は人々がPythonであらゆる種類のミニマイザを使うのを見ますが、私は最初に同じ名前のものに固執します。
コード:
# Not yet working
#
#
# Reimplementation of Octave code
#
#
# X = [ones(m, 1) X]
#
# [theta] = trainLinearReg([ones(m, 1) X], y, lambda);
#
# initial_theta = zeros(size(X, 2), 1);
# costFunction = @(t) linearRegCostFunction(X, y, t, lambda);
# options = optimset('MaxIter', 200, 'GradObj', 'on');
# % Minimize using fmincg
# theta = fmincg(costFunction, initial_theta, options);
def lr_cost(X, y, theta, lambda_):
m = X.shape[0]
cost = np.sum(np.power(((X * theta.T) - y), 2))/2/m + np.sum(lambda_/2/m * np.power(theta[:, 0:-1], 2))
return cost
def lr_grad(X, y, theta, lambda_):
m = X.shape[0]
grad = 1/m * X.T * (X*theta.T - y) + lambda_/m * np.vstack((0,theta[:, 0:-1]))
return grad.flatten()
def train_lr(X, y, lambda_):
from scipy.optimize import fmin_cg
initial_theta = np.zeros((X.shape[1], 1))
#initial_theta = np.zeros((1, 2))
print(f'initial_theta {initial_theta.shape}')
print(f'X {X.shape}\n{X}')
print(f'y {y.shape}\n{y}')
theta = fmin_cg(lr_cost, x0=initial_theta, args=(X, y, lambda_), maxiter=200)
#theta = fmin_cg(lr_cost, x0=initial_theta, fprime=lr_grad , args=(X, y, lambda_), maxiter=200)
print(f'theta {theta}')
# return theta
lambda_ = 0
train_lr(np.insert(X, 0 , 1, axis=1), y, lambda_)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-171-6fc8b0780a49> in <module>()
39
40 lambda_ = 0
---> 41 train_lr(np.insert(X, 0 , 1, axis=1), y, lambda_)
<ipython-input-171-6fc8b0780a49> in train_lr(X, y, lambda_)
35 print(f'y {y.shape}\n{y}')
36
---> 37 theta = fmin_cg(lr_cost, x0=initial_theta, args=(X, y, lambda_), maxiter=200)
38 print(f'theta {theta}')
39
/Users/apple/anaconda/lib/python3.6/site-packages/scipy/optimize/optimize.py in fmin_cg(f, x0, fprime, args, gtol, norm, epsilon, maxiter, full_output, disp, retall, callback)
1175 'return_all': retall}
1176
-> 1177 res = _minimize_cg(f, x0, args, fprime, callback=callback, **opts)
1178
1179 if full_output:
/Users/apple/anaconda/lib/python3.6/site-packages/scipy/optimize/optimize.py in _minimize_cg(fun, x0, args, jac, callback, gtol, norm, eps, maxiter, disp, return_all, **unknown_options)
1226 else:
1227 grad_calls, myfprime = wrap_function(fprime, args)
-> 1228 gfk = myfprime(x0)
1229 k = 0
1230 xk = x0
/Users/apple/anaconda/lib/python3.6/site-packages/scipy/optimize/optimize.py in function_wrapper(*wrapper_args)
290 def function_wrapper(*wrapper_args):
291 ncalls[0] += 1
--> 292 return function(*(wrapper_args + args))
293
294 return ncalls, function_wrapper
/Users/apple/anaconda/lib/python3.6/site-packages/scipy/optimize/optimize.py in approx_fprime(xk, f, epsilon, *args)
686
687 """
--> 688 return _approx_fprime_helper(xk, f, epsilon, args=args)
689
690
/Users/apple/anaconda/lib/python3.6/site-packages/scipy/optimize/optimize.py in _approx_fprime_helper(xk, f, epsilon, args, f0)
620 """
621 if f0 is None:
--> 622 f0 = f(*((xk,) + args))
623 grad = numpy.zeros((len(xk),), float)
624 ei = numpy.zeros((len(xk),), float)
/Users/apple/anaconda/lib/python3.6/site-packages/scipy/optimize/optimize.py in function_wrapper(*wrapper_args)
290 def function_wrapper(*wrapper_args):
291 ncalls[0] += 1
--> 292 return function(*(wrapper_args + args))
293
294 return ncalls, function_wrapper
<ipython-input-171-6fc8b0780a49> in lr_cost(X, y, theta, lambda_)
18 def lr_cost(X, y, theta, lambda_):
19 m = X.shape[0]
---> 20 cost = np.sum(np.power(((X * theta.T) - y), 2))/2/m + np.sum(lambda_/2/m * np.power(theta[:, 0:-1], 2))
21 return cost
22
ValueError: operands could not be broadcast together with shapes (2,) (1,12)
lr_costとlr_grad仕事に:
J = lr_cost(np.insert(X, 0 , 1, axis=1), y, theta, 1)
grad = lr_grad(np.insert(X, 0 , 1, axis=1), y, theta, 1)
J, grad
出力:
(303.99319222026429, matrix([[ -15.30301567, 598.25074417]]))
はこれをデバッグする良い方法はありますか?
可能であれば、私はOctaveセッションとIpythonセッションの両方を開始し、計算の重要な部分をステップ実行して、互換性のある結果が得られることを確認します。 – hpaulj