マルチレイヤパーセプトロンウェイトは変更されません

私はMachine Learningについてはかなり新しく、Machine Learning an algorithmic perspectiveで始まりました。私は本のウェブサイトで与えられたコードを微調整することによって、悪意のあるプログラムを悪意のあるプログラムから識別するロジスティック分類器を作ろうとしています。しかし、隠れ層および出力層に関連する重みは、100000エポック後も変化しない。マルチレイヤパーセプトロンウェイトは変更されません

完全なデータセットと部分的なバージョンのアルゴリズムを実行しようとしましたが、まだ運がありません。ここで

は、ここに私のMLPクラス

import numpy as np 
class mlp: 

def __init__(self, inputs, targets, nhidden, beta=1, momentum=0.9, outtype='logistic'): 
    """ Constructor """ 
    # Set up network size 
    self.nin = np.shape(inputs)[1] 
    self.nout = np.shape(targets)[1] 
    self.ndata = np.shape(inputs)[0] 
    self.nhidden = nhidden 

    self.beta = beta 
    self.momentum = momentum 
    self.outtype = outtype 

    # Initialise network 
    self.weights1 = (np.zeros((self.nin + 1, self.nhidden), dtype=float) - 0.5) * 2/np.sqrt(self.nin) 
    self.weights2 = (np.zeros((self.nhidden + 1, self.nout), dtype=float) - 0.5) * 2/np.sqrt(self.nhidden) 

def earlystopping(self, inputs, targets, valid, validtargets, eta, niterations=100): 

    valid = np.concatenate((valid, -np.ones((np.shape(valid)[0], 1))), axis=1) 

    old_val_error1 = 100002 
    old_val_error2 = 100001 
    new_val_error = 100000 

    count = 0 
    while (((old_val_error1 - new_val_error) > 0.001) or ((old_val_error2 - old_val_error1) > 0.001)): 
     count += 1 
     print 
     count 
     self.mlptrain(inputs, targets, eta, niterations) 
     old_val_error2 = old_val_error1 
     old_val_error1 = new_val_error 
     validout = self.mlpfwd(valid) 
     new_val_error = 0.5 * np.sum((validtargets - validout) ** 2) 

    print("Stopped", new_val_error, old_val_error1, old_val_error2) 
    return new_val_error 

def mlptrain(self, inputs, targets, eta, niterations): 
    """ Train the thing """ 
    # Add the inputs that match the bias node 
    inputs = np.concatenate((inputs, -np.ones((self.ndata, 1))), axis=1) 
    change = range(self.ndata) 



    print(self.weights2) 
    updatew1 = np.zeros((np.shape(self.weights1))) 
    updatew2 = np.zeros((np.shape(self.weights2))) 

    for n in range(niterations): 

     self.outputs = self.mlpfwd(inputs) 

     #error = 0.5 * np.sum((self.outputs - targets) ** 2) 
     if (np.mod(n, 100) == 0): 
      print ("Iteration: ", n, " Weight2: ", self.weights2) 


      # Different types of output neurons 
     if self.outtype == 'linear': 
      deltao = (self.outputs - targets)/self.ndata 
     elif self.outtype == 'logistic': 
      deltao = self.beta * (self.outputs - targets) * self.outputs * (1.0 - self.outputs) 
     elif self.outtype == 'softmax': 
      deltao = (self.outputs - targets) * (self.outputs * (-self.outputs) + self.outputs)/self.ndata 
     else: 
      print("error") 

     deltah = self.hidden * self.beta * (1.0 - self.hidden) * (np.dot(deltao, np.transpose(self.weights2))) 

     updatew1 = eta * (np.dot(np.transpose(inputs), deltah[:, :-1])) + self.momentum * updatew1 
     updatew2 = eta * (np.dot(np.transpose(self.hidden), deltao)) + self.momentum * updatew2 
     self.weights1 -= updatew1 
     self.weights2 -= updatew2 

     # Randomise order of inputs (not necessary for matrix-based calculation) 
     # np.random.shuffle(change) 
     # inputs = inputs[change,:] 
     # targets = targets[change,:] 


    print(self.weights2) 

def mlpfwd(self, inputs): 
    """ Run the network forward """ 

    self.hidden = np.dot(inputs, self.weights1); 
    self.hidden = 1.0/(1.0 + np.exp(-self.beta * self.hidden)) 
    self.hidden = np.concatenate((self.hidden, -np.ones((np.shape(inputs)[0], 1))), axis=1) 

    outputs = np.dot(self.hidden, self.weights2); 

    # Different types of output neurons 
    if self.outtype == 'linear': 
     return outputs 
    elif self.outtype == 'logistic': 
     return 1.0/(1.0 + np.exp(-self.beta * outputs)) 
    elif self.outtype == 'softmax': 
     normalisers = np.sum(np.exp(outputs), axis=1) * np.ones((1, np.shape(outputs)[0])) 
     return np.transpose(np.transpose(np.exp(outputs))/normalisers) 
    else: 
     print("error") 

def confmat(self, inputs, targets): 
    """Confusion matrix""" 

    # Add the inputs that match the bias node 
    inputs = np.concatenate((inputs, -np.ones((np.shape(inputs)[0], 1))), axis=1) 
    outputs = self.mlpfwd(inputs) 


    nclasses = np.shape(targets)[1] 

    if nclasses == 1: 
     nclasses = 2 
     outputs = np.where(outputs > 0.5, 1, 0) 
    else: 
     # 1-of-N encoding 
     outputs = np.argmax(outputs, 1) 
     targets = np.argmax(targets, 1) 

    cm = np.zeros((nclasses, nclasses)) 
    for i in range(nclasses): 
     for j in range(nclasses): 
      cm[i, j] = np.sum(np.where(outputs == j, 1, 0) * np.where(targets == i, 1, 0)) 

    print(outputs) 
    print(targets) 

    print("Confusion matrix is:") 
    print(cm) 
    print("Percentage Correct: ", np.trace(cm)/np.sum(cm) * 100)

は、各ベクトルは、7次元と最後のエントリを持つデータに

import mlp 
import numpy as np 

apk_train_data = np.array([ 
    [4, 1, 6, 29, 0, 3711, 1423906, 0], 
    [20, 1, 5, 24, 0, 4082, 501440, 0], 
    [3, 0, 1, 6, 0, 5961, 2426358, 0], 
    [0, 0, 2, 27, 0, 6074, 28762, 0], 
    [12, 1, 3, 17, 0, 4066, 505, 0], 
    [1, 0, 2, 5, 0, 1284, 38504, 0], 
    [2, 0, 2, 10, 0, 2421, 5827165, 0], 
    [5, 0, 17, 97, 0, 25095, 7429, 0], 
    [1, 1, 3, 22, 6, 4539, 9100705, 0], 
    [2, 0, 4, 15, 0, 2054, 264563, 0], 
    [3, 1, 6, 19, 0, 3562, 978171, 0], 
    [8, 0, 5, 12, 3, 1741, 1351990, 0], 
    [9, 0, 5, 12, 2, 1660, 2022743, 0], 
    [9, 0, 5, 12, 2, 1664, 2022743, 0], 
    [10, 4, 11, 70, 8, 43944, 51488321, 1], 
    [6, 0, 3, 18, 0, 8511, 19984102, 1], 
    [11, 2, 6, 44, 0, 61398, 32139, 1], 
    [0, 0, 0, 0, 0, 1008, 23872, 1], 
    [7, 1, 1, 16, 3, 46792, 94818, 1], 
    [3, 2, 1, 13, 2, 8263, 208820, 1], 
    [0, 0, 0, 2, 0, 2749, 3926, 1], 
    [10, 0, 1, 9, 0, 5220, 2275848, 1], 
    [1, 1, 3, 34, 6, 50030, 814322, 1], 
    [2, 2, 4, 48, 7, 86406, 12895, 1], 
    [0, 1, 5, 45, 2, 63060, 803121, 1], 
    [1, 0, 2, 11, 7, 7602, 1557, 1], 
    [3, 0, 1, 15, 3, 20813, 218352, 1] 
]) 
apk_test_data = np.array([ 
    [0, 0, 1, 9, 0, 4317, 118082, 0], 
    [8, 0, 5, 12, 3, 1742, 1351990, 0], 
    [8, 0, 5, 12, 3, 1744, 1351990, 0], 
    [0, 0, 1, 11, 2, 17630, 326164, 1], 
    [10, 2, 6, 45, 7, 22668, 30257520, 1], 
    [1, 0, 1, 8, 0, 9317, 33000349, 1], 
    [3, 0, 1, 15, 3, 20813, 218352, 1] 
]) 

p = mlp.mlp(apk_train_data[:, 0:7], apk_train_data[:, 7:], 9) 
p.mlptrain(apk_train_data[:, 0:7], apk_train_data[:, 7:], 0.25, 100000) 
p.confmat(apk_test_data[:, 0:7], apk_test_data[:, 7:])

を提供し、私の呼び出し元のコードである。ここ

ターゲットであるがいっぱいですデータセットを含むテキストファイル https://drive.google.com/open?id=1q_aGNgHxTBh_mmVAzVXKBa27NTJKeKV8

どうか私は間違っていると教えてください。同じことをするライブラリを使用するのが簡単な場合は、同じことをお勧めします。

出典

2017-11-30 James Howlett

、あなたが0にすべてのあなたの重みを初期化しますが、ランダムに代わりに重みを初期化しようとすることができます：私は反対の極端を使って説明しましょうか？ – Uvar

あなたは天才です！ランダムな値にウェイトを初期化すると、ウェイトがトレーニングに比べて変化します。しかし、この背後にある論理を説明できますか？理論的には重みを0に初期化することもうまくいくはずです。また、私はまだ正確な精度を得ることができません。それを改善する方法を提案してください。 –

これを改善する簡単な方法は、 'η'を減らすことです。もう1つは、データをネットワークに供給する前にデータを標準化することです。 :) – Uvar

コメントに記載されているように、ネットワークの重みをランダムに初期化すると、ネットワークトレインが作成されるはずです。

# Initialise network 
    self.weights1 = (np.random.rand(self.nin+1,self.nhidden)-0.5)*2/np.sqrt(self.nin) 
    self.weights2 = (np.random.rand(self.nhidden+1,self.nout)-0.5)*2/np.sqrt(self.nhidden)

次に、あなたのデータから私の観察は、プロパティが全く匹敵しないということです。これは、ネットワークの勾配の更新が1つの機能によって支配されることを意味します。これを解決するには、データを標準化する方法があります。

from sklearn.preprocessing import StandardScaler 
for i in range(apk_train_data.shape[1]-1): 
    scaler = StandardScaler().fit(apk_train_data[:,i].copy()) 
    apk_train_data[:,i] = scaler.transform(apk_train_data[:,i].copy()) 
    apk_test_data[:,i] = scaler.transform(apk_test_data[:,i].copy())

そして最後にではなく、少なくとも、etaは0.25も持つことはあまりにも大きいです。私の知る限り大きなread..butつまり

p.mlptrain(apk_train_data[:, 0:7], apk_train_data[:, 7:], 0.0001, 100000) 
p.confmat(apk_test_data[:, 0:7], apk_test_data[:, 7:]) 
# >> Percentage Correct: 71.4285714286 
p.confmat(apk_train_data[:,0:7], apk_train_data[:,7:]) 
# >> Percentage Correct: 88.8888888889

出典

2017-11-30 14:40:20 Uvar

マルチレイヤパーセプトロンウェイトは変更されません

答えて

関連する問題