ディープニューラルネットワークトレーニング、なぜネットウォークトレーニングが収束しないのですか？

MATCONVNET DagNNを使用しています。 AlexNetアーキテクチャーの使用。私architecutreの最後の数の層がディープニューラルネットワークトレーニング、なぜネットウォークトレーニングが収束しないのですか？

[![net = dagnn.DagNN() ; 
    imdb_32 =load('imdb_all_32_pd_norm.mat'); 
    imdb_32=imdb_32.imdb; 
    % some common options 
    opts.train.batchSize = 100; 
    opts.train.numEpochs = 100 ; 
    opts.train.continue = true ; 
    opts.train.gpus = \[\] ; 
    opts.train.learningRate = 0.2;%\[0.1 * ones(1,30), 0.01*ones(1,30), 0.001*ones(1,30)\] ;%0.002;%\[2e-1*ones(1, 10), 2e-2*ones(1, 5)\]; 
    opts.train.momentum = 0.9; 
    opts.train.expDir = expDir; 
    opts.train.numSubBatches = 1; 

    bopts.useGpu =0;%numel(opts.train.gpus) > 0 ; 

    %% NET 
    net.addLayer('conv1', dagnn.Conv('size', \[11 11 3 96\], 'hasBias', true, 'stride', \[4, 4\], 'pad', \[20 20 20 20\]), {'input'}, {'conv1'}, {'conv1f' 'conv1b'}); 
    net.addLayer('relu1', dagnn.ReLU(), {'conv1'}, {'relu1'}, {}); 
    net.addLayer('lrn1', dagnn.LRN('param', \[5 1 2.0000e-05 0.7500\]), {'relu1'}, {'lrn1'}, {}); 
    net.addLayer('pool1', dagnn.Pooling('method', 'max', 'poolSize', \[3, 3\], 'stride', \[2 2\], 'pad', \[0 0 0 0\]), {'lrn1'}, {'pool1'}, {}); 

    net.addLayer('conv2', dagnn.Conv('size', \[5 5 48 256\], 'hasBias', true, 'stride', \[1, 1\], 'pad', \[2 2 2 2\]), {'pool1'}, {'conv2'}, {'conv2f' 'conv2b'}); 
    net.addLayer('relu2', dagnn.ReLU(), {'conv2'}, {'relu2'}, {}); 
    net.addLayer('lrn2', dagnn.LRN('param', \[5 1 2.0000e-05 0.7500\]), {'relu2'}, {'lrn2'}, {}); 
    net.addLayer('pool2', dagnn.Pooling('method', 'max', 'poolSize', \[3, 3\], 'stride', \[2 2\], 'pad', \[0 0 0 0\]), {'lrn2'}, {'pool2'}, {}); 
    net.addLayer('drop2',dagnn.DropOut('rate',0.7),{'pool2'},{'drop2'}); 

    net.addLayer('conv3', dagnn.Conv('size', \[3 3 256 384\], 'hasBias', true, 'stride', \[1, 1\], 'pad', \[1 1 1 1\]), {'drop2'}, {'conv3'}, {'conv3f' 'conv3b'}); 
    net.addLayer('relu3', dagnn.ReLU(), {'conv3'}, {'relu3'}, {}); 

    net.addLayer('conv4', dagnn.Conv('size', \[3 3 192 384\], 'hasBias', true, 'stride', \[1, 1\], 'pad', \[1 1 1 1\]), {'relu3'}, {'conv4'}, {'conv4f' 'conv4b'}); 
    net.addLayer('relu4', dagnn.ReLU(), {'conv4'}, {'relu4'}, {}); 

    net.addLayer('conv5', dagnn.Conv('size', \[3 3 192 256\], 'hasBias', true, 'stride', \[1, 1\], 'pad', \[1 1 1 1\]), {'relu4'}, {'conv5'}, {'conv5f' 'conv5b'}); 
    net.addLayer('relu5', dagnn.ReLU(), {'conv5'}, {'relu5'}, {}); 
    net.addLayer('pool5', dagnn.Pooling('method', 'max', 'poolSize', \[3 3\], 'stride', \[2 2\], 'pad', \[0 0 0 0\]), {'relu5'}, {'pool5'}, {}); 
    net.addLayer('drop5',dagnn.DropOut('rate',0.5),{'pool5'},{'drop5'}); 

    net.addLayer('fc6', dagnn.Conv('size', \[1 1 256 4096\], 'hasBias', true, 'stride', \[1, 1\], 'pad', \[0 0 0 0\]), {'drop5'}, {'fc6'}, {'conv6f' 'conv6b'}); 
    net.addLayer('relu6', dagnn.ReLU(), {'fc6'}, {'relu6'}, {}); 

    net.addLayer('fc7', dagnn.Conv('size', \[1 1 4096 4096\], 'hasBias', true, 'stride', \[1, 1\], 'pad', \[0 0 0 0\]), {'relu6'}, {'fc7'}, {'conv7f' 'conv7b'}); 
    net.addLayer('relu7', dagnn.ReLU(), {'fc7'}, {'relu7'}, {}); 
    classLabels=max(unique(imdb_32.images.labels)); 
    net.addLayer('classifier', dagnn.Conv('size', \[1 1 4096 1\], 'hasBias', true, 'stride', \[1, 1\], 'pad', \[0 0 0 0\]), {'relu7'}, {'prediction'}, {'conv8f' 'conv8b'}); 
    net.addLayer('prob', dagnn.SoftMax(), {'prediction'}, {'prob'}, {}); 
    net.addLayer('l2_loss', dagnn.L2Loss(), {'prob', 'label'}, {'objective'}); 
    net.addLayer('error', dagnn.Loss('loss', 'classerror'), {'prob','label'}, 'error') ; 

    opts.colorDeviation = zeros(3) ; 
    net.meta.augmentation.jitterFlip = true ; 
    net.meta.augmentation.jitterLocation = true ; 
    net.meta.augmentation.jitterFlip = true ; 
    net.meta.augmentation.jitterBrightness = double(0.1 * opts.colorDeviation) ; 
    net.meta.augmentation.jitterAspect = \[3/4, 4/3\] ; 
    net.meta.augmentation.jitterScale = \[0.4, 1.1\] ; 
    net.meta.augmentation.jitterSaturation = 0.4 ; 
    net.meta.augmentation.jitterContrast = 0.4 ; 
    % net.meta.augmentation.jitterAspect = \[2/3, 3/2\] ; 
    net.meta.normalization.averageImage=imdb_32.images.data_mean; 
    initNet_He(net); 

    info = cnn_train_dag(net, imdb_32, @(i,b) getBatch(bopts,i,b), opts.train, 'val', find(imdb_32.images.set == 2)) ;][1]][1]

であり、各エポックの結果は添付ファイルに示されています。なぜエラーと目的は収束していないのですか？回帰損失はMSE損失です。

出典

2017-05-13 h612

にどこに添付ファイルがある？、たとえば、勢いを減少させるようにしてください –

申し訳ありません - 今は添付しました。 – h612

は0.5

出典

2017-05-13 18:48:43

モーメンタムを変更した後も変更はありません.BTWなぜモーメントが低いのですか？ – h612

ディープニューラルネットワークトレーニング、なぜネットウォークトレーニングが収束しないのですか？

答えて

関連する問題