私はこのセマンティックセグメンテーションに間違っていますか？

これで、FCN32でシングルチャンネル画像のセマンティックセグメンテーションを行っていたのはかなり長い時間（約2か月）です。私はさまざまな学習率で遊んで、さらにBatchNormalizationの層を追加しました。しかし、私は何の成果を見てもうまくいっていませんでした。私は即座にここで助けを求める以外は選択肢がありませんでした。私は本当に私が間違っていることを知りません。私はこのセマンティックセグメンテーションに間違っていますか？

私はbatch.Thisなどのネットワークに列車損失曲線LR=1e-9を一枚の画像を送信し、lr_policy="fixed"ています：

私は1e-4（下図）に学習率を増加させました。損失は減っているようですが、学習曲線は正常に機能していません。

次のように、私は、元のFCNの層を減少：（1）Conv64 - ReLU - Conv64 - ReLU - MaxPool

（2）Conv128 - ReLU - Conv128 - ReLU - MaxPool

（ 3）Conv256 - ReLU - Conv256 - ReLU - MaxPool

（4）Conv4096 - ReLU - Dropout0.5

（5）Conv4096 - ReLU - Dropout0.5

（6）CONV2

（7）Deconv32x - 作物

（8）SoftmaxWithLoss

layer { 
    name: "data" 
    type: "Data" 
    top: "data" 
    include { 
    phase: TRAIN 
    } 
    transform_param { 
    mean_file: "/jjj/FCN32_mean.binaryproto" 
    } 

    data_param { 
    source: "/jjj/train_lmdb/" 
    batch_size: 1 
    backend: LMDB 
    } 
} 
layer { 
    name: "label" 
    type: "Data" 
    top: "label" 
    include { 
    phase: TRAIN 
    } 
    data_param { 
    source: "/jjj/train_label_lmdb/" 
    batch_size: 1 
    backend: LMDB 
    } 
} 
layer { 
    name: "data" 
    type: "Data" 
    top: "data" 
    include { 
    phase: TEST 
    } 
    transform_param { 
    mean_file: "/jjj/FCN32_mean.binaryproto" 
    } 
    data_param { 
    source: "/jjj/val_lmdb/" 
    batch_size: 1 
    backend: LMDB 
    } 
} 
layer { 
    name: "label" 
    type: "Data" 
    top: "label" 
    include { 
    phase: TEST 
    } 
    data_param { 
    source: "/jjj/val_label_lmdb/" 
    batch_size: 1 
    backend: LMDB 
    } 
} 

layer { 
    name: "conv1_1" 
    type: "Convolution" 
    bottom: "data" 
    top: "conv1_1" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    convolution_param { 
    num_output: 64 
    pad: 100 
    kernel_size: 3 
    stride: 1 
    } 
} 
layer { 
    name: "relu1_1" 
    type: "ReLU" 
    bottom: "conv1_1" 
    top: "conv1_1" 
} 
layer { 
    name: "conv1_2" 
    type: "Convolution" 
    bottom: "conv1_1" 
    top: "conv1_2" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    convolution_param { 
    num_output: 64 
    pad: 1 
    kernel_size: 3 
    stride: 1 
    } 
} 
layer { 
    name: "relu1_2" 
    type: "ReLU" 
    bottom: "conv1_2" 
    top: "conv1_2" 
} 
layer { 
    name: "pool1" 
    type: "Pooling" 
    bottom: "conv1_2" 
    top: "pool1" 
    pooling_param { 
    pool: MAX 
    kernel_size: 2 
    stride: 2 
    } 
} 
layer { 
    name: "conv2_1" 
    type: "Convolution" 
    bottom: "pool1" 
    top: "conv2_1" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    convolution_param { 
    num_output: 128 
    pad: 1 
    kernel_size: 3 
    stride: 1 
    } 
} 
layer { 
    name: "relu2_1" 
    type: "ReLU" 
    bottom: "conv2_1" 
    top: "conv2_1" 
} 
layer { 
    name: "conv2_2" 
    type: "Convolution" 
    bottom: "conv2_1" 
    top: "conv2_2" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    convolution_param { 
    num_output: 128 
    pad: 1 
    kernel_size: 3 
    stride: 1 
    } 
} 
layer { 
    name: "relu2_2" 
    type: "ReLU" 
    bottom: "conv2_2" 
    top: "conv2_2" 
} 
layer { 
    name: "pool2" 
    type: "Pooling" 
    bottom: "conv2_2" 
    top: "pool2" 
    pooling_param { 
    pool: MAX 
    kernel_size: 2 
    stride: 2 
    } 
} 
layer { 
    name: "conv3_1" 
    type: "Convolution" 
    bottom: "pool2" 
    top: "conv3_1" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    convolution_param { 
    num_output: 256 
    pad: 1 
    kernel_size: 3 
    stride: 1 
    } 
} 
layer { 
    name: "relu3_1" 
    type: "ReLU" 
    bottom: "conv3_1" 
    top: "conv3_1" 
} 
layer { 
    name: "conv3_2" 
    type: "Convolution" 
    bottom: "conv3_1" 
    top: "conv3_2" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    convolution_param { 
    num_output: 256 
    pad: 1 
    kernel_size: 3 
    stride: 1 
    } 
} 
layer { 
    name: "relu3_2" 
    type: "ReLU" 
    bottom: "conv3_2" 
    top: "conv3_2" 
} 
layer { 
    name: "pool3" 
    type: "Pooling" 
    bottom: "conv3_2" 
    top: "pool3" 
    pooling_param { 
    pool: MAX 
    kernel_size: 2 
    stride: 2 
    } 
} 
layer { 
    name: "fc6" 
    type: "Convolution" 
    bottom: "pool3" 
    top: "fc6" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    convolution_param { 
    num_output: 4096 
    pad: 0 
    kernel_size: 7 
    stride: 1 
    } 
} 
layer { 
    name: "relu6" 
    type: "ReLU" 
    bottom: "fc6" 
    top: "fc6" 
} 
layer { 
    name: "drop6" 
    type: "Dropout" 
    bottom: "fc6" 
    top: "fc6" 
    dropout_param { 
    dropout_ratio: 0.5 
    } 
} 
layer { 
    name: "fc7" 
    type: "Convolution" 
    bottom: "fc6" 
    top: "fc7" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    convolution_param { 
    num_output: 4096 
    pad: 0 
    kernel_size: 1 
    stride: 1 
    } 
} 
layer { 
    name: "relu7" 
    type: "ReLU" 
    bottom: "fc7" 
    top: "fc7" 
} 
layer { 
    name: "drop7" 
    type: "Dropout" 
    bottom: "fc7" 
    top: "fc7" 
    dropout_param { 
    dropout_ratio: 0.5 
    } 
} 
layer { 
    name: "score_fr" 
    type: "Convolution" 
    bottom: "fc7" 
    top: "score_fr" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    convolution_param { 
    num_output: 5 #21 
    pad: 0 
    kernel_size: 1 
    weight_filler { 
     type: "xavier" 
    } 
    bias_filler { 
     type: "constant" 
    } 
    } 
} 
layer { 
    name: "upscore" 
    type: "Deconvolution" 
    bottom: "score_fr" 
    top: "upscore" 
    param { 
    lr_mult: 0 
    } 
    convolution_param { 
    num_output: 5 #21 
    bias_term: false 
    kernel_size: 64 
    stride: 32 
    group: 5 #2 
    weight_filler: { 
     type: "bilinear" 
    } 
    } 
} 
layer { 
    name: "score" 
    type: "Crop" 
    bottom: "upscore" 
    bottom: "data" 
    top: "score" 
    crop_param { 
    axis: 2 
    offset: 19 
    } 
} 
layer { 
    name: "accuracy" 
    type: "Accuracy" 
    bottom: "score" 
    bottom: "label" 
    top: "accuracy" 
    include { 
    phase: TRAIN 
    } 
} 

layer { 
    name: "accuracy" 
    type: "Accuracy" 
    bottom: "score" 
    bottom: "label" 
    top: "accuracy" 
    include { 
    phase: TEST 
    } 
} 
layer { 
    name: "loss" 
    type: "SoftmaxWithLoss" 
    bottom: "score" 
    bottom: "label" 
    top: "loss" 
    loss_param { 
    ignore_label: 255 
    normalize: true 
    } 
}

これはソルバの定義である：初め

net: "train_val.prototxt" 
#test_net: "val.prototxt" 
test_iter: 736 
# make test net, but don't invoke it from the solver itself 
test_interval: 2000 #1000000 
display: 50 
average_loss: 50 
lr_policy: "step" #"fixed" 
stepsize: 2000 #+ 
gamma: 0.1 #+ 
# lr for unnormalized softmax 
base_lr: 0.0001 
# high momentum 
momentum: 0.99 
# no gradient accumulation 
iter_size: 1 
max_iter: 10000 
weight_decay: 0.0005 
snapshot: 2000 
snapshot_prefix: "snapshot/NET1" 
test_initialization: false 
solver_mode: GPU

損失は減少し始めていますが、何度か反復した後でも、良い学習行動を示していません：

私は深い学習の初心者であり、caffeです。私は本当になぜこれが起こるのか分からない。専門知識を持っている人には本当に感謝します。モデルの定義を見てください。あなたが私を助けてくれたらとても感謝しています。

出典

2017-03-03 S.EB

事前に重み付けを開始していますか、ゼロからランダムにネットワークをトレーニングしていますか？ –

実際にはゼロから練習しています。助けてくれてありがとう –

問題は、最初から訓練していることです。 FCN paperを読む

は、彼らが常にImageNetにpretrainedされているネットワークを使用することを教えてくれます、それは、ませ仕事はあなたが最初からそれを訓練する場合、それはpretrainedネットワークから微調整される必要があります。ランダムウェイトから訓練した場合の最適化問題は収束しません。

出典

2017-03-04 12:41:23

あなたのコメントをありがとう。この[リンク]（http://cs231n.github.io/transfer-learning/）をポイント番号「4」で参照すると、「新しいデータセットは元のデータセットと大きく異なります。データセットは非常に大きいので、私たちはConvNetを最初から訓練する余裕があると期待するかもしれません」と、私のデータは事前に訓練されたモデルの元のデータセットとは非常に異なるので、何が起こるでしょうか？私は混乱していると思う。どうもありがとう。 –

どうすればいいですか？あなたの提案は何ですか？ありがとうございます –

@ S.EB最も簡単なのは、ネットワークをデータセットにプリトレインすることです。画像分類のためにImageNetと言い、次にアーキテクチャの一部を変更して、それを細分化します。これを行うことができない場合は、独自のネットワークアーキテクチャを使用せず、VGG/ResNetのような事前に準備されたネットワークを使用してください。 –

私はこのセマンティックセグメンテーションに間違っていますか？

答えて

関連する問題