0

以下のコードでは、出力を計算するために10個のコンバレイヤーとLSTMを使用しています。convレイヤを追加すると損失が増え、精度が低下します。Tensorflow

1 Convレイヤを使用してLSTMを使用すると、正常に動作します。しかし、私がより多くのコンバ層(以下のコードで10個のコンバイル層)を追加すると、損失が大きくなり、精度が低下し始めます。そして、各コンバ層の後にバッチノルムを適用して、グラデーションが消えないようにしました。このネットワークを過大にするために、私は5〜10の例を使ってネットワークがオーバーフィットしているかどうかを確認しますが、それは私に大きな損失を与えています。もし私がconvレイヤを減らしてもうまく動作し、ポイントして停止します。ここのバグは何ですか?

EDIT:あなたは試してみたい場合はここで再現可能なコードがある - 私たちは56のようにランダムにニューロンや層数を増やしたときに、これは、発生している理由は、私はこれを考え出したlink

X = tf.placeholder(tf.float32, [None,time_steps,embedding]) 
Y = tf.placeholder(tf.int32, [None]) 
A = tf.placeholder(tf.bool) 
B = tf.placeholder(tf.float32) 

x = tf.expand_dims(X,3) 

filter_shape = [1, embedding, 1, 64] 
conv_weights = tf.get_variable("conv_weights1" , filter_shape, tf.float32, tf.truncated_normal_initializer(mean=0.0, stddev=1.0)) 
conv_biases = tf.Variable(tf.constant(0.1, shape=[64])) 
conv = tf.nn.conv2d(x, conv_weights, strides=[1,1,1,1], padding = "VALID") 
normalize = tf.nn.elu(conv + conv_biases) 
tf_normalize = tf.contrib.layers.batch_norm(inputs = normalize,is_training = A) 
outputs_fed_lstm = tf_normalize 

filter_shape2 = [1, 1, 64, 64] 
conv_weights2 = tf.get_variable("conv_weights2" , filter_shape2, tf.float32,tf.truncated_normal_initializer(mean=0.0, stddev=1.0)) 
conv_biases2 = tf.Variable(tf.constant(0.1, shape=[64])) 
conv2 = tf.nn.conv2d(outputs_fed_lstm, conv_weights2, strides=[1,1,1,1], padding = "VALID") 
normalize2 = tf.nn.elu(conv2 + conv_biases2) 
tf_normalize2 = tf.contrib.layers.batch_norm(inputs = normalize2,is_training = A) 
outputs_fed_lstm2 = tf_normalize2 

filter_shape3 = [1, 1, 64, 64] 
conv_weights3 = tf.get_variable("conv_weights3" , filter_shape3, tf.float32,tf.truncated_normal_initializer(mean=0.0, stddev=1.0)) 
conv_biases3 = tf.Variable(tf.constant(0.1, shape=[64])) 
conv3 = tf.nn.conv2d(outputs_fed_lstm2, conv_weights3, strides=[1,1,1,1], padding = "VALID") 
normalize3 = tf.nn.elu(conv3 + conv_biases3) 
tf_normalize3 = tf.contrib.layers.batch_norm(inputs = normalize3,is_training = A) 
outputs_fed_lstm3 = tf_normalize3 

filter_shape4 = [1, 1, 64, 128] 
conv_weights4 = tf.get_variable("conv_weights4" , filter_shape4, tf.float32,tf.truncated_normal_initializer(mean=0.0, stddev=1.0)) 
conv_biases4 = tf.Variable(tf.constant(0.1, shape=[128])) 
conv4 = tf.nn.conv2d(outputs_fed_lstm3, conv_weights4, strides=[1,1,1,1], padding = "VALID") 
normalize4 = tf.nn.elu(conv4 + conv_biases4) 
tf_normalize4 = tf.contrib.layers.batch_norm(inputs = normalize4,is_training = A) 
outputs_fed_lstm4 = tf_normalize4 

filter_shape5 = [1, 1, 128, 128] 
conv_weights5 = tf.get_variable("conv_weights5" , filter_shape5, tf.float32,tf.truncated_normal_initializer(mean=0.0, stddev=1.0)) 
conv_biases5 = tf.Variable(tf.constant(0.1, shape=[128])) 
conv5 = tf.nn.conv2d(outputs_fed_lstm4, conv_weights5, strides=[1,1,1,1], padding = "VALID") 
normalize5 = tf.nn.elu(conv5 + conv_biases5) 
tf_normalize5 = tf.contrib.layers.batch_norm(inputs = normalize5,is_training = A) 
outputs_fed_lstm5 = tf_normalize5 

filter_shape6 = [1, 1, 128, 128] 
conv_weights6 = tf.get_variable("conv_weights6" , filter_shape6, tf.float32,tf.truncated_normal_initializer(mean=0.0, stddev=1.0)) 
conv_biases6 = tf.Variable(tf.constant(0.1, shape=[128])) 
conv6 = tf.nn.conv2d(outputs_fed_lstm5, conv_weights6, strides=[1,1,1,1], padding = "VALID") 
normalize6 = tf.nn.elu(conv6 + conv_biases6) 
tf_normalize6 = tf.contrib.layers.batch_norm(inputs = normalize6,is_training = A) 
outputs_fed_lstm6 = tf_normalize6 

filter_shape7 = [1, 1, 128, 256] 
conv_weights7 = tf.get_variable("conv_weights7" , filter_shape7, tf.float32,tf.truncated_normal_initializer(mean=0.0, stddev=1.0)) 
conv_biases7 = tf.Variable(tf.constant(0.1, shape=[256])) 
conv7 = tf.nn.conv2d(outputs_fed_lstm6, conv_weights7, strides=[1,1,1,1], padding = "VALID") 
normalize7 = tf.nn.elu(conv7 + conv_biases7) 
tf_normalize7 = tf.contrib.layers.batch_norm(inputs = normalize7,is_training = A) 
outputs_fed_lstm7 = tf_normalize7 

filter_shape8 = [1, 1, 256, 256] 
conv_weights8 = tf.get_variable("conv_weights8" , filter_shape8, tf.float32,tf.truncated_normal_initializer(mean=0.0, stddev=1.0)) 
conv_biases8 = tf.Variable(tf.constant(0.1, shape=[256])) 
conv8 = tf.nn.conv2d(outputs_fed_lstm7, conv_weights8, strides=[1,1,1,1], padding = "VALID") 
normalize8 = tf.nn.elu(conv8 + conv_biases8) 
tf_normalize8 = tf.contrib.layers.batch_norm(inputs = normalize8,is_training = A) 
outputs_fed_lstm8 = tf_normalize8 

filter_shape9 = [1, 1, 256, 256] 
conv_weights9 = tf.get_variable("conv_weights9" , filter_shape9, tf.float32,tf.truncated_normal_initializer(mean=0.0, stddev=1.0)) 
conv_biases9 = tf.Variable(tf.constant(0.1, shape=[256])) 
conv9 = tf.nn.conv2d(outputs_fed_lstm8, conv_weights9, strides=[1,1,1,1], padding = "VALID") 
normalize9 = tf.nn.elu(conv9 + conv_biases9) 
tf_normalize9 = tf.contrib.layers.batch_norm(inputs = normalize9,is_training = A) 
outputs_fed_lstm9 = tf_normalize9 

filter_shape0 = [1, 1, 256, 512] 
conv_weights0 = tf.get_variable("conv_weights0" , filter_shape0, tf.float32,tf.truncated_normal_initializer(mean=0.0, stddev=1.0)) 
conv_biases0 = tf.Variable(tf.constant(0.1, shape=[512])) 
conv0 = tf.nn.conv2d(outputs_fed_lstm9, conv_weights0, strides=[1,1,1,1], padding = "VALID") 
normalize0 = tf.nn.elu(conv0 + conv_biases0) 
tf_normalize0 = tf.contrib.layers.batch_norm(inputs = normalize0,is_training = A) 
outputs_fed_lstm0 = tf_normalize0 


outputs_fed_lstm10 = tf.nn.dropout(x = outputs_fed_lstm0, keep_prob = B) 


x = tf.squeeze(outputs_fed_lstm10, [2])  
x = tf.transpose(x, [1, 0, 2]) 
x = tf.reshape(x, [-1, 512]) 
x = tf.split(0, time_steps, x) 

lstm = tf.nn.rnn_cell.LSTMCell(num_units = _units, state_is_tuple=True) 

# multi_lstm = tf.nn.rnn_cell.MultiRNNCell([lstm] * lstm_layers, state_is_tuple = True) 

outputs , state = tf.nn.rnn(lstm,x, dtype = tf.float32)  

weights = tf.Variable(tf.random_normal([_units,num_classes])) 
biases = tf.Variable(tf.random_normal([num_classes])) 

logits = tf.matmul(outputs[-1], weights) + biases 

c_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits,Y) 
loss = tf.reduce_mean(c_loss) 


global_step = tf.Variable(0, name="global_step", trainable=False) 
decayed_learning_rate = tf.train.exponential_decay(learning_rate = 0.01,global_step = global_step,decay_steps = 300, decay_rate = 0.96, staircase = True) 
optimizer= tf.train.AdamOptimizer(learning_rate = decayed_learning_rate) 
#grads_and_vars = optimizer.compute_gradients(loss,[conv_weights0]) 
minimize_loss = optimizer.minimize(loss, global_step=global_step) 

correct_predict = tf.nn.in_top_k(logits, Y, 1) 
accuracy = tf.reduce_mean(tf.cast(correct_predict, tf.float32)) 
+0

誰かもこの問題を抱えています - http://stackoverflow.com/questions/35268167/low-accuracy-with-change-to-tensorflow-cifar10-example – shader

答えて

1

、 86、次に496、どのように多くの層を追加しても、結果は膨大な損失と非常に低い精度になるので、この種の問題が発生する傾向がありますので、この問題に対する解決策は64,128,256,512などの特定のパターンに従います。

関連する問題