2017-07-02 16 views
0

ゼロからのPythonで複数の線形回帰を実装しています。あらゆる時代の後のコストは非常に急速に増加し、最終的には溢れています。何が起こっていますか?論理的な誤りまたは系統的な誤りはありますか?ここ は、コードは次のとおりです。コスト関数は勾配降下の実装では減少しません

import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 

class LinearRegression: 

    def __init__(self): 
     #initialize parameters with none values 
     self.cost_history = None 
     self.std_dev = None 
     self.mean = None 
     self.weights = None 

    def set_mean_and_std_dev(self, X_data, axis=0): 
     #mean and std_deviation of training_data 
     self.std_dev = np.array(np.std(X_data, axis = axis)) 
     self.mean = np.array(np.mean(X_data, axis = axis)) 
     print("Mean : ", self.mean) 
     print("Standard deviation : ", self.std_dev) 
     return 

    def normalize(self, X_data): 
     #normalizing the data 
     X_data = (X_data - self.mean)/(self.std_dev+0.00001) 
     return 

    def cost(self, X, y): 
     #squared error based cost 
     return np.sum((np.dot(X, self.weights.T)-y)**2) 

    def get_gradient(self, X, y, pred_y, learning_rate): 
     grad = np.zeros(shape=(X.shape[1],), dtype='float64') 
     #print("X.shape : ", X.shape, " grad.shape : ", grad.shape, "pred_y.shape", pred_y.shape, "y.shape : ", y.shape) 

     for ix in range(X.shape[0]): 
      #for each example in X 
      x_example = X[ix, :] 
      error = (pred_y[ix] - y[ix]) 

      for jx in range(grad.shape[0]): 
       #for each feature of X 
       grad[jx] = grad[jx] + x_example[jx]*error 
     for ix in range(grad.shape[0]): 
      #divide by the number of examples 
      grad[jx] = grad[jx]*learning_rate*(1.0/X.shape[0]) 
     return grad 


    def gradient_descent(self, X, y, learning_rate=0.001, num_iterations=100): 

     self.weights = np.zeros(shape=(X.shape[1],), dtype='float64') 
     for ix in range(X.shape[1]): 
      #initialize weights with random values 
      self.weights[ix] = np.random.rand()*100 

     #initialize cost history 
     self.cost_history = [] 

     for ix in range(num_iterations): 
      pred_y = np.dot(X, self.weights.T) 
      gradient = self.get_gradient(X=X, y=y, pred_y=pred_y, learning_rate=learning_rate) 
      self.weights = self.weights - gradient 
      cost = self.cost(X, y) 
      self.cost_history.append(cost) 

      if ix%10 == 0: 
       print("(learning_rate/(X.shape[0]))*gradient) : ", gradient)    
       print("Cost at ", ix, " epoch is : ", cost) 

    def predict(self, X): 
     return np.dot(X, self.weights.T) 

    def get_weights(self): 
     return self.weights 

#create a linear regression object 
lr = LinearRegression() 


# # Some random function generator 
# y = 2.5 + 2*X1 - X2 - 3*X3 + 1.23*X4 
def generate_data(X): 
    y = np.zeros(shape=(X.shape[0],), dtype='float64') 
    for ix in range(X.shape[0]): 
     y[ix] = 1.23 + 2.5*X[ix, 0] + 2*X[ix, 1] - X[ix, 2] - 3*X[ix, 3] 
    return y 


X = np.zeros(shape=(300, 5), dtype='float64') 
data_gen = [[np.random.rand()*107 for jx in range(4)] for ix in range(300)] 
X[:, 1:] = np.array(data_gen, dtype='float64') 

y = generate_data(X[:, 1:]) 

lr.set_mean_and_std_dev(X) 
lr.normalize(X) 

X[:, 0] = 1 
print(X.shape, y.shape, X.dtype, y.dtype) 
print(X[0], y[0]) 



X_train, X_test, y_train, y_test = X[:200], X[200:], y[:200], y[200:] 
print(y_test) 


lr.gradient_descent(X_train, y_train, 0.01, 500) 
pred_y = lr.predict(X_test) 
+0

あなたは小さな学習率を試してみましたか? –

+0

はい、私は0.00001と0.0000001も試しましたが、それは撮影を続けます。 –

+0

私は最良のアプローチは、2Dデータセット(300 x 2)をとり、グラジエント、ウェイト、およびコストの値を追跡することだと思います。それはあなたがエラーを単一にするのを助けるべきです。また、紙で解くことができる例を使って作業するのにも役立ちます。 –

答えて

1

機能get_gradientに誤りがあります。 インデックスjxは、コードブロックfor ixで使用されます。

for ix in range(grad.shape[0]): 
    #divide by the number of examples 
    grad[jx] = grad[jx]*learning_rate*(1.0/X.shape[0]) 

そして、あなたはあまりにも多くのforループを記述します。 代わりに、あなたはget_gradient機能としてこれを試すことができます。

def get_gradient(self, X, y, pred_y, learning_rate): 
    errors = (pred_y - y) 
    grad = X.T.dot(errors) 
    #divide by the number of examples 
    grad = grad*learning_rate/X.shape[0] 
    return grad 

その他の問題:

  1. 重みの初期値が大きすぎる:

    self.weights = np.zeros(shape=(X.shape[1],), dtype='float64') 
    for ix in range(X.shape[1]): 
        #initialize weights with random values 
        self.weights[ix] = np.random.rand()*100 
    

    あなたは小さな初期値を使用することができます1行のみを使用してください:self.weights = np.random.rand(X.shape[1])

  2. 学習率が大きすぎると、試してみてください。set_mean_and_std_devnormalize:あなたはこれらの二つの機能を持つ何かをしたいようlr.gradient_descent(X_train, y_train, 0.00001, 3000)

  3. 思えます。 しかし、このコードでは何もしませんでした。

最終的なコード:

import numpy as np 

class LinearRegression: 

    def __init__(self): 
     #initialize parameters with none values 
     self.cost_history = None 
     self.std_dev = None 
     self.mean = None 
     self.weights = None 

    def cost(self, X, y): 
     #squared error based cost 
     return np.sum((self.predict(X)-y)**2) 

    def get_gradient(self, X, y, pred_y, learning_rate): 
     errors = (pred_y - y) 
     grad = X.T.dot(errors) 
     #divide by the number of examples 
     grad = grad*learning_rate/X.shape[0] 
     return grad 


    def gradient_descent(self, X, y, 
         learning_rate=0.001, 
         num_iterations=100): 
     #initialize weights with random values 
     self.weights = np.random.rand(X.shape[1]) 
     #initialize cost history 
     self.cost_history = [] 

     for ix in range(num_iterations): 
      pred_y = self.predict(X) 
      gradient = self.get_gradient(X=X, y=y, pred_y=pred_y, learning_rate=learning_rate) 
      self.weights = self.weights - gradient 
      cost = self.cost(X, y) 
      self.cost_history.append(cost) 

      if ix%10 == 0: 
       print("(learning_rate/(X.shape[0]))*gradient) : ", gradient) 
       print("Cost at ", ix, " epoch is : ", cost) 

    def predict(self, X): 
     return np.dot(X, self.weights) 


#create a linear regression object 
lr = LinearRegression() 

# # Some random function generator 
# y = 2.5 + 2*X1 - X2 - 3*X3 + 1.23*X4 
def generate_data(X): 
    y = np.zeros(shape=(X.shape[0],), dtype='float64') 
    for ix in range(X.shape[0]): 
     y[ix] = 1.23 + 2.5*X[ix, 0] + 2*X[ix, 1] - X[ix, 2] - 3*X[ix, 3] 
    return y 


X = np.zeros(shape=(300, 5), dtype='float64') 
data_gen = [[np.random.rand()*107 for jx in range(4)] for ix in range(300)] 
X[:, 1:] = np.array(data_gen, dtype='float64') 

y = generate_data(X[:, 1:]) 

X[:, 0] = 1 
print(X.shape, y.shape, X.dtype, y.dtype) 
print(X[0], y[0]) 

X_train, X_test, y_train, y_test = X[:200], X[200:], y[:200], y[200:] 
print(y_test) 

lr.gradient_descent(X_train, y_train, 0.00001, 3000) 
pred_y = lr.predict(X_test) 
関連する問題