0

私のネットのいくつかのレイヤーは、事前に訓練されたモデルを読み込みます。彼らのパラメータを修正し、他の層を訓練したいと思います。caffeの体重更新を防ぐ方法

私はthis pageを踏襲し、0にlr_multidecay_multiを設定し、propagate_down: false、とさえbase_lr: 0weight_decay: 0ソルバーインチしかし、テストロス(テストごとにすべてのテストイメージを使用する)は、各イテアでまだ非常にゆっくりと変化しています。数千回繰り返した後、精度は0になります(事前訓練モデルがロードされたときの80%から)。ここで

は、私は、この例では、すべての層をfeezeしたいが、スタートを訓練するとき、損失は変化し続け、私はただの重みを初期化して0にパラメータより上に設定AA二層の一例である...

layer { 
    name: "data" 
    type: "ImageData" 
    top: "data" 
    top: "label" 
    include { 
     phase: TRAIN 
    } 
    transform_param { 
     scale: 0.017 
     mirror: true 
     crop_size: 32 
     mean_value: 115 
     mean_value: 126 
     mean_value: 130 
     color: true 
     contrast: true 
     brightness: true 
    } 
    image_data_param { 
     source: "/data/zhuhao5/data/cifar100/cifar100_train_replicate.txt" 
     batch_size: 64 
     shuffle: true 
     #pair_size: 3 
    } 
    } 
    layer { 
    name: "data" 
    type: "ImageData" 
    top: "data" 
    top: "label" 
    include { 
     phase: TEST 
    } 
    transform_param { 
     scale: 0.017 
     mirror: false 
     crop_size: 32 
     mean_value: 115 
     mean_value: 126 
     mean_value: 130 
    } 
    image_data_param { 
     source: "/data/zhuhao5/data/cifar100/cifar100_test.txt" 
     batch_size: 100 
     shuffle: false 
    } 
    } 
    #-------------- TEACHER -------------------- 
    layer { 
    name: "conv1" 
    type: "Convolution" 
    bottom: "data" 
    propagate_down: false 
    top: "conv1" 
    param { 
     lr_mult: 0 
     decay_mult: 0 
    } 
    convolution_param { 
     num_output: 16 
     bias_term: false 
     pad: 1 
     kernel_size: 3 
     stride: 1 
     weight_filler { 
     type: "msra" 
     } 
    } 
    } 
    layer { 
    name: "res2_1a_1_bn" 
    type: "BatchNorm" 
    bottom: "conv1" 
    propagate_down: false 
    top: "res2_1a_1_bn" 
    param { 
     lr_mult: 0 
     decay_mult: 0 
    } 
     param { 
     lr_mult: 0 
     decay_mult: 0 
    } 
    } 
    layer { 
    name: "res2_1a_1_scale" 
    type: "Scale" 
    bottom: "res2_1a_1_bn" 
    propagate_down: false 
    top: "res2_1a_1_bn" 
     param { 
     lr_mult: 0 
     decay_mult: 0 
    } 
    scale_param { 
     bias_term: true 
    } 
    } 
    layer { 
    name: "res2_1a_1_relu" 
    type: "ReLU" 
    bottom: "res2_1a_1_bn" 
    propagate_down: false 
    top: "res2_1a_1_bn" 
    } 
    layer { 
    name: "pool_5" 
    type: "Pooling" 
    bottom: "res2_1a_1_bn" 
    propagate_down: false 
    top: "pool_5" 
    pooling_param { 
     pool: AVE 
     global_pooling: true 
    } 
    } 
    layer { 
    name: "fc100" 
    type: "InnerProduct" 
    bottom: "pool_5" 
    propagate_down: false 
    top: "fc100" 
    param { 
     lr_mult: 0 
     decay_mult: 0 
    } 
    param { 
     lr_mult: 0 
     decay_mult: 0 
    } 
    inner_product_param { 
     num_output: 100 
     weight_filler { 
     type: "msra" 
     } 
     bias_filler { 
     type: "constant" 
     value: 0 
     } 
    } 
    } 
    #--------------------------------- 
    layer { 
    name: "tea_soft_loss" 
    type: "SoftmaxWithLoss" 
    bottom: "fc100" 
    bottom: "label" 
    propagate_down: false 
    propagate_down: false 
    top: "tea_soft_loss" 
    loss_weight: 0 
    } 

    ##----------- ACCURACY---------------- 

    layer { 
    name: "teacher_accuracy" 
    type: "Accuracy" 
    bottom: "fc100" 
    bottom: "label" 
    top: "teacher_accuracy" 
    accuracy_param { 
     top_k: 1 
    } 
    } 

test_iter: 100 

test_interval: 10 

base_lr: 0 
momentum: 0 
weight_decay: 0 

lr_policy: "poly" 
power: 1 

display: 10000 

max_iter: 80000 

snapshot: 5000 

type: "SGD" 

solver_mode: GPU 

random_seed: 10086 

とログ:

I0829 16:31:39.363433 14986 net.cpp:200] teacher_accuracy does not need backward computation. 
I0829 16:31:39.363438 14986 net.cpp:200] tea_soft_loss does not need backward computation. 
I0829 16:31:39.363442 14986 net.cpp:200] fc100_fc100_0_split does not need backward computation. 
I0829 16:31:39.363446 14986 net.cpp:200] fc100 does not need backward computation. 
I0829 16:31:39.363451 14986 net.cpp:200] pool_5 does not need backward computation. 
I0829 16:31:39.363454 14986 net.cpp:200] res2_1a_1_relu does not need backward computation. 
I0829 16:31:39.363458 14986 net.cpp:200] res2_1a_1_scale does not need backward computation. 
I0829 16:31:39.363462 14986 net.cpp:200] res2_1a_1_bn does not need backward computation. 
I0829 16:31:39.363466 14986 net.cpp:200] conv1 does not need backward computation. 
I0829 16:31:39.363471 14986 net.cpp:200] label_data_1_split does not need backward computation. 
I0829 16:31:39.363485 14986 net.cpp:200] data does not need backward computation. 
I0829 16:31:39.363490 14986 net.cpp:242] This network produces output tea_soft_loss 
I0829 16:31:39.363494 14986 net.cpp:242] This network produces output teacher_accuracy 
I0829 16:31:39.363507 14986 net.cpp:255] Network initialization done. 
I0829 16:31:39.363559 14986 solver.cpp:56] Solver scaffolding done. 
I0829 16:31:39.363852 14986 caffe.cpp:248] Starting Optimization 
I0829 16:31:39.363862 14986 solver.cpp:272] Solving WRN_22_12_to_WRN_18_4_v5_net 
I0829 16:31:39.363865 14986 solver.cpp:273] Learning Rate Policy: poly 
I0829 16:31:39.365981 14986 solver.cpp:330] Iteration 0, Testing net (#0) 
I0829 16:31:39.366190 14986 blocking_queue.cpp:49] Waiting for data 
I0829 16:31:39.742347 14986 solver.cpp:397]  Test net output #0: tea_soft_loss = 85.9064 
I0829 16:31:39.742437 14986 solver.cpp:397]  Test net output #1: teacher_accuracy = 0.0113 
I0829 16:31:39.749806 14986 solver.cpp:218] Iteration 0 (0 iter/s, 0.385886s/10000 iters), loss = 0 
I0829 16:31:39.749862 14986 solver.cpp:237]  Train net output #0: tea_soft_loss = 4.97483 
I0829 16:31:39.749877 14986 solver.cpp:237]  Train net output #1: teacher_accuracy = 0 
I0829 16:31:39.749908 14986 sgd_solver.cpp:105] Iteration 0, lr = 0 
I0829 16:31:39.794306 14986 solver.cpp:330] Iteration 10, Testing net (#0) 
I0829 16:31:40.171447 14986 solver.cpp:397]  Test net output #0: tea_soft_loss = 4.9119 
I0829 16:31:40.171510 14986 solver.cpp:397]  Test net output #1: teacher_accuracy = 0.0115 
I0829 16:31:40.219133 14986 solver.cpp:330] Iteration 20, Testing net (#0) 
I0829 16:31:40.596911 14986 solver.cpp:397]  Test net output #0: tea_soft_loss = 4.91862 
I0829 16:31:40.596971 14986 solver.cpp:397]  Test net output #1: teacher_accuracy = 0.0116 
I0829 16:31:40.645246 14986 solver.cpp:330] Iteration 30, Testing net (#0) 
I0829 16:31:41.021711 14986 solver.cpp:397]  Test net output #0: tea_soft_loss = 4.92105 
I0829 16:31:41.021772 14986 solver.cpp:397]  Test net output #1: teacher_accuracy = 0.0117 
I0829 16:31:41.069464 14986 solver.cpp:330] Iteration 40, Testing net (#0) 
I0829 16:31:41.447345 14986 solver.cpp:397]  Test net output #0: tea_soft_loss = 4.91916 
I0829 16:31:41.447407 14986 solver.cpp:397]  Test net output #1: teacher_accuracy = 0.0117 
I0829 16:31:41.495157 14986 solver.cpp:330] Iteration 50, Testing net (#0) 
I0829 16:31:41.905607 14986 solver.cpp:397]  Test net output #0: tea_soft_loss = 4.9208 
I0829 16:31:41.905654 14986 solver.cpp:397]  Test net output #1: teacher_accuracy = 0.0117 
I0829 16:31:41.952659 14986 solver.cpp:330] Iteration 60, Testing net (#0) 
I0829 16:31:42.327942 14986 solver.cpp:397]  Test net output #0: tea_soft_loss = 4.91936 
I0829 16:31:42.328025 14986 solver.cpp:397]  Test net output #1: teacher_accuracy = 0.0117 
I0829 16:31:42.374279 14986 solver.cpp:330] Iteration 70, Testing net (#0) 
I0829 16:31:42.761359 14986 solver.cpp:397]  Test net output #0: tea_soft_loss = 4.91859 
I0829 16:31:42.761430 14986 solver.cpp:397]  Test net output #1: teacher_accuracy = 0.0117 
I0829 16:31:42.807821 14986 solver.cpp:330] Iteration 80, Testing net (#0) 
I0829 16:31:43.232321 14986 solver.cpp:397]  Test net output #0: tea_soft_loss = 4.91668 
I0829 16:31:43.232398 14986 solver.cpp:397]  Test net output #1: teacher_accuracy = 0.0117 
I0829 16:31:43.266436 14986 solver.cpp:330] Iteration 90, Testing net (#0) 
I0829 16:31:43.514633 14986 blocking_queue.cpp:49] Waiting for data 
I0829 16:31:43.638617 14986 solver.cpp:397]  Test net output #0: tea_soft_loss = 4.91836 
I0829 16:31:43.638684 14986 solver.cpp:397]  Test net output #1: teacher_accuracy = 0.0117 
I0829 16:31:43.685451 14986 solver.cpp:330] Iteration 100, Testing net (#0) 

私はMIのだろうか

ここでは、ソルバーですcaffeの更新プロセスでssed :(

答えて

1

が見つかりました。

BatchNormレイヤーは、TRAINおよびTESTフェーズで異なるuse_global_statsを使用します。

私の問題では、トレーニングプロセスでuse_global_stats: trueを設定する必要があります。

また、Scale層も忘れないでください。

改訂の層があるべき

layer { 
    name: "res2_1a_1_bn" 
    type: "BatchNorm" 
    bottom: "conv1" 
    top: "res2_1a_1_bn" 
    batch_norm_param { 
     use_global_stats: true 
    } 
} 
layer { 
    name: "res2_1a_1_scale" 
    type: "Scale" 
    bottom: "res2_1a_1_bn" 
    top: "res2_1a_1_bn" 
    param { 
    lr_mult: 0 
    decay_mult: 0 
    } 
    param { 
    lr_mult: 0 
    decay_mult: 0 
    } 
    scale_param { 
    bias_term: true 
    } 
} 
関連する問題