2017-10-23 12 views
0

私は最近、MATLABからTensorflowに移行することを決めました。私はTensorflowでALexNetモデルを作成し始めました。自分のデータでAlexNetを最初から訓練したい。 (私はMATLABを使ってMATLABを使ってこれを成功させました)。しかし、テンソルフローの私のモデルは収束しません。損失と正確さは変わりません。 データをtfrecordsファイルに保存して読み込み、データが正しく読み込まれていることを確認できます。しかし、私はモデルdeosntが訓練するように見える理由を得ることができません。 Ubuntu 16.04でPython 2.7でTensorflow 1.2.0を使用しています。ここで AlexNetがテンソルフローに収束しない

は私のコードです:あなたはいけない

y = tf.nn.softmax(fc8) 
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_)) 

:「val.tfrecordsは」私の訓練日を含むtfrecordsファイルである私はあなたのコード内の少なくとも1つのエラーを参照してください

import numpy as np 
import matplotlib.pyplot as plt 
import sys 
import cv2 
from random import shuffle 
import random as rand 
import glob 
import tensorflow as tf 
import os 
import scipy.misc 
import math 

def _int64_feature(value): 
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) 
def _bytes_feature(value): 
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) 

# Initialize the weights with random numbers 
def W_init (w_height,w_width,num_channels,num_filters,method = 'normal', W_name = 'w'): 
    if method is 'normal': std =1 
    if method is 'xavier': std = std = np.sqrt(2./(w_width*w_height*num_channels)) 
    return(tf.Variable(tf.truncated_normal([w_height,w_width,num_channels,num_filters], mean=0.0, stddev=std),name = W_name,trainable=True)) 



# Create wrappers for simplicity 

# Convolution layer 
def conv_layer(x,W,b,stride,pad): 
    x = tf.pad(x, [[0, 0], [pad, pad], [pad, pad], [0, 0]], "CONSTANT") 
    y = tf.nn.conv2d(x, W, strides=[1,stride,stride,1], padding='VALID') 
    y = tf.nn.bias_add(y,b) 
    return y 

# Pooling layer 
def pool_layer(x,k,stride,method): 
    if method is 'max': 
     y = tf.nn.max_pool(x, ksize = [1,k,k,1], strides = [1,stride,stride,1], padding='VALID') 
    if method is 'avg': 
     y = tf.nn.avg_pool(x, ksize = [1,k,k,1], strides = [1,stride,stride,1], padding='VALID') 
    return y 


# Create a model AlexNet 
def AlexNet(x,y_): 

    # input 
    x = tf.reshape(x,shape =[-1,224,224,3]) 

    # conv1 + relu1 
    conv1 = tf.nn.relu(conv_layer(x,W=W_init (11,11,3,96,method = 'xavier',W_name ='w1'), b=tf.Variable(tf.zeros([96]),name='b1',trainable=True),stride=4,pad= 3)) 
    # maxpool1 
    max1 = pool_layer(conv1,k=2,stride=2,method='max') 


    # conv2 + relu2 
    conv2 = tf.nn.relu(conv_layer(max1,W=W_init (5,5,96,256,method = 'xavier',W_name ='w2'), b=tf.Variable(tf.zeros([256]),name='b2',trainable=True),stride=1,pad= 2)) 
    # maxpool2 
    max2 = pool_layer(conv2,k=2,stride=2,method='max') 

    # conv3 + relu3 
    conv3 = tf.nn.relu(conv_layer(max2,W=W_init (3,3,256,384,method = 'xavier',W_name ='w3'), b=tf.Variable(tf.zeros([384]),name='b3',trainable=True),stride=1,pad= 1)) 

    # conv4 + relu4 
    conv4 = tf.nn.relu(conv_layer(conv3,W=W_init (3,3,384,384,method = 'xavier',W_name ='w4'), b=tf.Variable(tf.zeros([384]),name='b4',trainable=True),stride=1,pad= 1)) 

    # conv5 + relu5 
    conv5 = tf.nn.relu(conv_layer(conv4,W=W_init (3,3,384,256,method = 'xavier',W_name ='w5'), b=tf.Variable(tf.zeros([256]),name='b5',trainable=True),stride=1,pad= 1)) 
    # maxpool5 
    max5 = pool_layer(conv5,k=2,stride=2,method='max') 

    # flatten the convolution output to use in fc layer 
    max5_size = np.product([s.value for s in max5.get_shape()[1:]]) 
    max5_flat = tf.reshape(max5, [-1, max5_size ])  


    # fc6 + relu6 +drop6 
    fc6 = tf.nn.relu(tf.matmul(max5_flat,tf.Variable(tf.truncated_normal([max5_size,4096],mean=0.0, stddev=2./math.sqrt(max5_size)),name='w6',trainable=True))+ tf.Variable(tf.zeros([4096]),name='b6',trainable=True)) 
    drop6 = tf.nn.dropout(fc6, 0.5) 

    # fc7 + relu7 +drop7 
    fc7 = tf.nn.relu(tf.matmul(drop6, tf.Variable(tf.truncated_normal([4096,4096], mean=0.0, stddev=2./math.sqrt(4096)),name='w7',trainable=True))+ tf.Variable(tf.zeros([4096]),name='b7',trainable=True)) 
    drop7 = tf.nn.dropout(fc7, 0.5) 

    # fc8 
    fc8 = tf.matmul(drop7, tf.Variable(tf.truncated_normal([4096,23], mean=0.0, stddev=2./math.sqrt(23)),name='w8',trainable=True))+ tf.Variable(tf.zeros([23]),name='b8',trainable=True) 

    y = tf.nn.softmax(fc8) 
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_)) 

    # Evaluate model 
    correct_pred = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) 
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))  
    optimizer = tf.train.AdamOptimizer(0.01).minimize(cost) 


    return cost,accuracy 



train_filename = '/home/Documents/MyData/val.tfrecords' 

# Model parameters 
learning_rate = 0.01 
Nimages = 1087 
mean_image = np.load('mean_image_256.npy') # This is the 
n_input = 224*224*3 # img shape: 224*224*3 
n_classes = 23 
batch_size = 200 
Num_epochs = 1000 
display_step = 3*batch_size 

## Read the tfrecord file we've just created ############################################# 
# 1- Create a list of filenames: In this case there's only a single file 
train_data_path = train_filename 


# TF graph Inputs and Placeholders 
x_ = tf.placeholder(tf.float32, [None,224,224,3]) 
y_ = tf.placeholder(tf.float32, [None,n_classes]) 

# Define out loss and optimizer 

with tf.Session() as sess: 
    feature = {'train/image' : tf.FixedLenFeature([], tf.string), 
       'train/label' : tf.FixedLenFeature([], tf.int64), 
       'train/height': tf.FixedLenFeature([], tf.int64), 
       'train/width' : tf.FixedLenFeature([], tf.int64)} 

    # 2- Create a queue to hold filenames: To do so, we use tf.train.string_input_producer function which hold filenames in a FIFO queue. 
    # it gets the list of filnames. It also has some optional arguments including num_epochs which indicates the number of epoch you want to to load the data, 
    # and shuffle which indicates whether to suffle the filenames in the list or not. It is set to True by default. 

    train_filename_queue = tf.train.string_input_producer([train_data_path], num_epochs=None) 
    val_filename_queue = tf.train.string_input_producer([val_data_path], num_epochs=None) 

    # 3- Define a reader and read the next record 
    # For files of TFRecords we need to define a TFRecordReader with reader = tf.TFRecordReader(). 
    # Now, the reader returns the next record using: reader.read(filename_queue) 
    reader = tf.TFRecordReader() 

    _, train_serialized_example = reader.read(train_filename_queue) 
    _, val_serialized_example = reader.read(val_filename_queue) 

    # 4- Decode the record read by the reader 
    # A decoder is needed to decode the record read by the reader. 
    # In case of using TFRecords files the decoder should be tf.parse_single_example. it takes a serialized Example and a dictionary 
    # which maps feature keys to FixedLenFeature or VarLenFeature values 
    # and returns a dictionary which maps feature keys to Tensor values: features = tf.parse_single_example(serialized_example, features=feature) 
    train_features = tf.parse_single_example(train_serialized_example, features=feature) 
    val_features = tf.parse_single_example(val_serialized_example, features=feature) 

    # 5- Convert the image data from string back to the numbers 
    # tf.decode_raw(bytes, out_type) takes a Tensor of type string and convert it to typeout_type. 
    # However, for labels which have not been converted to string, we just need to cast them using tf.cast(x, dtype) 
    train_image = tf.decode_raw(train_features['train/image'], tf.float32) 

    # 6- Cast label data into int32 and Reshape image data into the original shape 
    train_label = tf.cast(train_features['train/label'], tf.int32) 
    train_label = tf.one_hot(train_label, n_classes) 
    train_height = tf.cast(train_features['train/height'], tf.int32) 
    train_width = tf.cast(train_features['train/width'], tf.int32) 
    train_image = tf.reshape(train_image, tf.stack([train_height, train_width, 3])) 



    # 7- Any preprocessing here ... 
    train_image = tf.image.central_crop(train_image, 1) 
    train_image = tf.image.resize_images(train_image, [256,256]) 
    train_image = tf.random_crop(train_image, [224, 224, 3]) 
    train_image = tf.image.random_flip_left_right(train_image) 





    # 8- Creates batches by randomly shuffling tensors 
    # Batching: Another queue is needed to create batches from the examples. You can create the batch queue using: 
    # tf.train.shuffle_batch([image, label], batch_size=10, capacity=30, num_threads=1, min_after_dequeue=10) 
    # where capacity is the maximum size of queue, min_after_dequeue is the minimum size of queue after dequeue, 
    # and num_threads is the number of threads enqueuing examples. 
    # Using more than one thread, it comes up with a faster reading. 
    # The first argument in a list of tensors which you want to create batches from. 
    train_images, train_labels = tf.train.shuffle_batch([train_image, train_label], batch_size=batch_size, capacity=3*batch_size, num_threads=1, min_after_dequeue=batch_size,allow_smaller_final_batch=True) 




    coord = tf.train.Coordinator() 
    threads = tf.train.start_queue_runners(coord=coord) 

    cost,accuracy = AlexNet(x_,y_) 

    # 9- Initialize all global and local variables 

    init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) 
    sess.run(init_op) 
    writer = tf.summary.FileWriter(logdir = '/tmp/tf/foo', graph=tf.get_default_graph()) 
    writer.flush() 

    # 10- Create a coordinator and run all QueueRunner objects 
    # Filing the example queue: Some functions of tf.train such as tf.train.shuffle_batch add tf.train. 
    # QueueRunner objects to your graph. Each of these objects hold a list of enqueue op for a queue 
    # to run in a thread. Therefore, to fill a queue you need to call tf.train.start_queue_runners 
    # which starts threades for all the queue runners in the graph. 
    # However, to manage these threads you need a tf.train.Coordinator to terminate the threads at the proper time. 



    # Create a saver for writing training checkpoints. 
    saver = tf.train.Saver() 


    train_acc = np.zeros(Num_epochs) 
    val_acc = np.zeros(Num_epochs) 

    for epoch in range(Num_epochs): 
     im_counter = 0 

     for iter in range(Nimages/batch_size): 
      im_counter+=batch_size 
      # Get a training image batch and subtract mean 
      t_img, t_lbl = sess.run([train_images, train_labels]) 
      t_img = (t_img - scipy.misc.imresize(mean_image, (224,224)))/255 
      #t_lbl = to_onehot(t_lbl,n_classes) 



      # Run session 

      # Calculate batch loss and accuracy 
      loss, acc = sess.run([cost, accuracy], feed_dict={x_: t_img, y_: t_lbl}) 

      # Display training results 
      if (im_counter%display_step)==0: 
       print "epoch " + str(epoch) + " Processed images: " + str(im_counter) + "/" +str(Nimages)+", Minibatch Loss= " + \ 
       "{:.6f}".format(loss) + ", Accuracy= " + \ 
       "{:.5f}".format(acc) 

     # After an epoch is trained, save model and run validation 
     saver.save(sess, 'AlexNet_saved_model.ckpt') 
     train_acc[epoch] = acc 

     im_counter = 0 


    print "Optimization Finished!" 
    plt.plot(train_acc) 
    plt.title('Training accuracy') 
    plt.show()    
    # Stop the threads 
    coord.request_stop() 

    # Wait for threads to stop 
    coord.join(threads) 
    sess.close() 
+1

便利なモデル動物園で提供されているAlexNetを使用する代わりに、なぜMATLABから変換していますか?それはあなたが持っている問題を修正するはずです。 – Prune

答えて

0

tf.nn.softmax_cross_entropy_with_logitsを呼び出す前にsoftmaxを適用してください。

WARNINGdocumentationは明らかにこれ述べ、それは効率のために内部logitssoftmax を行っているため、このオペアンプは、スケーリングされていないlogitsを期待しています。出力がのsoftmaxでこのオペレーションを呼び出さないでください。誤った結果が出る可能性があります。

+0

あなたの答えをありがとう。誤ってsoftmax_cross_entropy_with_logitsの前に実際にsoftmaxを適用しています。私はそれを修正した。しかし、それでも同じ振る舞いをしています。 –

関連する問題