2016-09-19 5 views
0

私はCNN + LSTM + CTCモデルをtensorflowでbulidしたいのですが、トレーニング中にNAN値を取得するのはどうしたらいいですか?入力INPUTを特別に扱う必要がありますか? 一方、LOSS値は保持されています私はこのモデルを以前に書いたのと同じ問題に直面するためにtheanoを使用しました)何か助けてくれてありがとう!CTCトレーニングのNAN値を避けるには?

#incoding:utf-8 
import tensorflow as tf 
import numpy as np 
import cv2 
import Get_Data 
import random 
import time 

#load data 
model_data_path = "checkpoints.data" 
images_path = "/home/liuyi/test/images" 
#images_path = "/home/night/test/images" 
ans_name = "answer" 
images_data, ans_data = Get_Data.get_data(images_path, ans_name) 
np.save("/home/liuyi/test/tf_images.npy", images_data) 
np.save("/home/liuyi/test/tf_labels.npy", ans_data) 
print images_data.shape 
print ans_data 
#bulid model 
#----define---- 
def conv2d(x, w, b, strides=1): 
    x = tf.nn.conv2d(x, w, (1, strides, strides, 1), "SAME") 
    x = tf.nn.bias_add(x, b) 
    return tf.nn.relu(x) 

def dropout(x, rate): 
    return tf.nn.dropout(x, rate) 

def maxpool2d(x, poolsize=(2,2)): 
    px = poolsize[0] 
    py = poolsize[1] 
    return tf.nn.max_pool(x, ksize=(1, px, py, 1), strides=(1, px, py, 1),padding="SAME") 

def flatten(x): 
    return tf.contrib.layers.flatten(x) 

def full_con(x, w, b): 
    x = tf.matmul(x, w) 
    return tf.nn.bias_add(x, b) 

def LSTM(x, n_input, hidden_units, out_dim, forget_bias = 1.0, layer_num = 1): 
    lstm = tf.nn.rnn_cell.LSTMCell(hidden_units, forget_bias=forget_bias, state_is_tuple=True,num_proj=out_dim) 
    lstms = tf.nn.rnn_cell.MultiRNNCell([lstm]*layer_num ,state_is_tuple=True) 
    x = tf.reshape(x, (int(x.get_shape()[0]), int(x.get_shape()[1]), n_input)) 
    out, _ = tf.nn.dynamic_rnn(lstms, x, dtype="float") 
    out = tf.transpose(out, [1, 0, 2]) 
    return out 
#----define weight---- 
weights = { 
    'wc1': tf.Variable(tf.random_normal([5, 5, 1, 8])), 
    'wc2': tf.Variable(tf.random_normal([5, 5, 8, 16])), 
    'wc3': tf.Variable(tf.random_normal([5, 5, 16, 16])), 
    'wc4': tf.Variable(tf.random_normal([5, 5, 16, 16])), 
    'wc5': tf.Variable(tf.random_normal([5, 5, 16, 16])), 
    'wc6': tf.Variable(tf.random_normal([5, 5, 16, 16])), 
    'wf1': tf.Variable(tf.random_normal([3200, 1000])), 
    'wf2': tf.Variable(tf.random_normal([1000, 50])), 
} 
biases = { 
    'bc1': tf.Variable(tf.random_normal([8])), 
    'bc2': tf.Variable(tf.random_normal([16])), 
    'bc3': tf.Variable(tf.random_normal([16])), 
    'bc4': tf.Variable(tf.random_normal([16])), 
    'bc5': tf.Variable(tf.random_normal([16])), 
    'bc6': tf.Variable(tf.random_normal([16])), 
    'bf1': tf.Variable(tf.random_normal([1000])), 
    'bf2': tf.Variable(tf.random_normal([50])), 
} 
#----define model---- 
batch_size = 200 
num_classes = 26+1+1 
max_len = 21 
sequence_length = np.full((batch_size),max_len,dtype=np.int32)#! 
x = tf.placeholder("float", [batch_size, 200, 60, 1], "images") 
y_i = tf.placeholder(tf.int64, [None, 2], "y_i") 
y_v = tf.placeholder(tf.int32, [None,], "y_v") 
y_shape = tf.placeholder(tf.int64, [2,], "y_shape") 
#--------CNN Layer-------- 
conv2do1 = conv2d(x, weights['wc1'], biases['bc1']) 
conv2do2 = conv2d(conv2do1, weights['wc2'], biases['bc2']) 
conv2do2 = maxpool2d(conv2do2) 
#--------CNN Layer-------- 
conv2do3 = conv2d(conv2do2, weights['wc3'], biases['bc3']) 
conv2do4 = conv2d(conv2do3, weights['wc4'], biases['bc4']) 
conv2do4 = maxpool2d(conv2do4) 
#--------CNN Layer-------- 
conv2do5 = conv2d(conv2do4, weights['wc5'], biases['bc5']) 
conv2do6 = conv2d(conv2do5, weights['wc6'], biases['bc6']) 
conv2do6 = maxpool2d(conv2do6) 
#--------Flatten Layer-------- 
conv2do6 = flatten(conv2do6) 
#--------Full Connection-------- 
fc1 = full_con(conv2do6, weights['wf1'], biases['bf1']) 
fc2 = full_con(fc1, weights['wf2'], biases['bf2']) 
#--------LSTM Layer-------- 
lstms = LSTM(fc2, n_input=1, hidden_units=32, out_dim=num_classes, layer_num=3) 
#--------CTC Layer-------- 
ctc_o = tf.nn.ctc_loss(lstms, tf.SparseTensor(y_i, y_v, y_shape), sequence_length) 
#---------------- 
loss = tf.reduce_mean(ctc_o) 
ctc_p = tf.nn.ctc_greedy_decoder(lstms, sequence_length)[0][0] 
o = ctc_p 
train = tf.train.AdagradOptimizer(learning_rate=0.01).minimize(loss) 
saver = tf.train.Saver(tf.all_variables()) 
#run model 
epoch = 200 
images_sum = 10000 
train_rate = 0.8 
slice_pos = 9800 

train_images = images_data[:slice_pos] 
train_labels = ans_data[:slice_pos] 

test_images = images_data[slice_pos:] 
test_labels = ans_data[slice_pos:] 

random_list = np.arange(slice_pos) 

batch_sum = int(slice_pos/batch_size) 
test_batch = int(images_sum-slice_pos)/batch_size 

init = tf.initialize_all_variables() 
sess = tf.InteractiveSession() 
sess.run(init) 
file_name = "out" 
for e in range(epoch): 
    random.shuffle(random_list) 
    for i in range(batch_sum): 
     begin_time = time.clock() 
     train_x = [train_images[m] for m in random_list[i*batch_size:(i+1)*batch_size]] 
     train_y = [train_labels[m] for m in random_list[i*batch_size:(i+1)*batch_size]] 
     train_yi, train_yv, train_ys = Get_Data.SparseDataFrom(train_y) 
     batch_loss = sess.run(loss, feed_dict={x: train_x, y_i: train_yi, y_v: train_yv, y_shape: train_ys}) 
     sess.run(train, feed_dict={x: train_x, y_i: train_yi, y_v: train_yv, y_shape: train_ys}) 
     end_time = time.clock() 
     print "epoch{0}/{1}: batch{2}/{3} loss={4} time={5}s".format(e+1, epoch, (i+1)*batch_size, slice_pos, batch_loss,(end_time-begin_time)*(batch_sum-i)) 
    right_num = 0 
    loss_sum = 0 
    begin_time = time.clock() 
    for i in range(test_batch): 
     test_x = [test_images[i * batch_size:(i + 1) * batch_size]] 
     test_y = [test_labels[i * batch_size:(i + 1) * batch_size]] 
     test_yi, test_yv, test_ys = Get_Data.SparseDataFrom(test_y) 
     tmp_loss = sess.run(loss, feed_dict={x: test_x, y_i: test_yi, y_v: test_yv, y_shape: test_ys}) 
     test_ans = sess.run(o, feed_dict={x: test_x, y_i: test_yi, y_v: test_yv, y_shape: test_ys}) 
     test_ans = Get_Data.SparseDatatoDense(test_ans) 
     tmp_right_num = Get_Data.data_difference(test_ans, test_y) 
     loss_sum += tmp_loss 
     right_num += tmp_right_num 
    end_time = time.clock() 
    print "epoch{0}/{1}: loss={2} right_num = {3} time={4}s".format(e + 1, epoch, loss_sum, right_num, end_time - begin_time) 
    saver.save(sess, model_data_path) 
sess.close() 
+0

通常、ゼロ値のログやゼロに非常に近い数値で除算するなどの操作によって生成される値です。より詳細な回答のためにコードを投稿することを検討してください。 –

答えて

0
あなたは既にこの質問はなく、どのように古い与え解決しているかもしれません

...

私はBLANKのために余分な出力次元を追加するまで、私もnansを生成したワープ-CTC用トーチバインディングを使用ラベル。

LSTMにout_dim=num_classes+1をお試しください。

私は役立つことを願っています。

+0

また、CTC損失関数に入力するシーケンスの長さが畳み込みの効果に合わせて調整された実際の出力サイズを反映していることを確認してください、ミニパッチテンソルのパディングをマスクすることが重要です。 –

関連する問題