2017-08-06 16 views
0

私は、ロジットクラス予測子の代わりに画像を出力する畳み込みネットワークを使用する必要があるプロジェクトに着手しました。この目的のために、私はダウンロードしたCNNコードをアダプタに持ってきましたhttps://github.com/aymericdamien/TensorFlow-ExamplesTensorflow画像を返す畳み込みネットワーク(ログなし)

私の入力データはバイナリファイルから読み込まれた64x64画像です。バイナリファイルは、2つの64x64イメージのレコードから順に構成されます。私はネットワークの第2のイメージと64x64出力の違いであるコスト関数を最小限にする必要があります。

これは私が入力されたデータを読み出すために書いたモジュールです:

import tensorflow as tf 

# various initialization variables 
BATCH_SIZE = 128 
N_FEATURES = 9 

# This function accepts a tensor of size [batch_size, 2 ,record_size] 
# and segments in into two tensors of size [batch_size, record] along the second dimension 
# IMPORTANT: to be executed within an active session 
def segment_batch(batch_p, batch_size, n_input): 
    batch_xs = tf.slice(batch_p, [0,0,0], [batch_size,1,n_input]) # optical data tensor 
    batch_ys = tf.slice(batch_p, [0,1,0], [batch_size,1,n_input])   # GT data tensor 
    optical = tf.reshape([batch_xs], [batch_size, n_input]) 
    gt = tf.reshape([batch_ys], [batch_size, n_input]) 

    return [optical, gt] 



def batch_generator(filenames, record_size, batch_size): 
    """ filenames is the list of files you want to read from. 
    record_bytes: The size of a record in bytes 
    batch_size: The size a data batch (examples/batch) 
    """ 

    filename_queue = tf.train.string_input_producer(filenames) 
    reader = tf.FixedLengthRecordReader(record_bytes=2*record_size) #  record size is double the value given (optical + ground truth images) 
    _, value = reader.read(filename_queue) 


    # read in the data (UINT8) 
    content = tf.decode_raw(value, out_type=tf.uint8) 



    # The bytes read represent the image, which we reshape 
    # from [depth * height * width] to [depth, height, width]. 
    # read optical data slice 
    depth_major = tf.reshape(
    tf.strided_slice(content, [0], 
        [record_size]), 
    [1, 64, 64]) 

    # read GT (ground truth) data slice 
    depth_major1 = tf.reshape(
    tf.strided_slice(content, [record_size], 
        [2*record_size]), 
    [1, 64, 64]) 

    # Optical data 
    # Convert from [depth, height, width] to [height, width, depth]. 
    uint8image = tf.transpose(depth_major, [1, 2, 0]) 
    uint8image = tf.reshape(uint8image, [record_size]) # reshape into a single-dimensional vector 
    uint8image = tf.cast(uint8image, tf.float32) # cast into a float32 
    uint8image = uint8image/255 # normalize 

    # Ground Truth data 
    # Convert from [depth, height, width] to [height, width, depth]. 
    gt_image = tf.transpose(depth_major1, [1, 2, 0]) 
    gt_image = tf.reshape(gt_image, [record_size]) # reshape into a single-dimensional vector 
    gt_image = tf.cast(gt_image, tf.float32) # cast into a float32 
    gt_image = gt_image/255 # normalize 

    # stack them into a single features tensor 
    features = tf.stack([uint8image, gt_image]) 

    # minimum number elements in the queue after a dequeue, used to ensure 
    # that the samples are sufficiently mixed 
    # I think 10 times the BATCH_SIZE is sufficient 
    min_after_dequeue = 10 * batch_size 

    # the maximum number of elements in the queue 
    capacity = 20 * batch_size 

    # shuffle the data to generate BATCH_SIZE sample pairs 
    data_batch = tf.train.shuffle_batch([features], batch_size=batch_size, 
            capacity=capacity, min_after_dequeue=min_after_dequeue) 

    return data_batch 

これは私の実装の主なコードです:の形状を微調整をたくさんした後

from __future__ import print_function 

# Various initialization variables 
DATA_PATH_OPTICAL_TRAIN = 'data/building_ground_truth_for_training.bin' 
DATA_PATH_EVAL = 'data/building_ground_truth_for_eval.bin' 

import tensorflow as tf 
import numpy as np 
import matplotlib.pyplot as plt 
import time 

# custom imports 
import data_reader2 


# Parameters 
learning_rate = 0.001 
training_iters = 200000 
batch_size = 128 
epochs = 10 
display_step = 10 
rows = 64 
cols = 64 

# Network Parameters 
n_input = 4096 # optical image data (img shape: 64*64) 
n_classes = 4096 # output is an image of same resolution as initial image 
dropout = 0.75 # Dropout, probability to keep units 

# input data parameters 
record_size = 64**2 
total_bytes_of_optical_binary_file = 893329408 # total size of binary file containing training data ([64z64 optical] [64x64 GT]) 

# create the data batches (queue) 
# Accepts two parameters. The tensor containing the binary files and the size of a record 
data_batch = data_reader2.batch_generator([DATA_PATH_OPTICAL_TRAIN],record_size, batch_size) # train set 
data_batch_eval = data_reader2.batch_generator([DATA_PATH_EVAL],record_size, batch_size) # train set 

############################################################## 
######################### FUNCTIONS ########################## 
############################################################## 

# extract optical array from list 
# A helper function. Data returned from segment_batch is a list which contains two arrays. 
# The first array contains the optical data while the second contains the ground truth data 
def extract_optical_from_list(full_batch): 
    optical = full_batch[0] # extract array from list 
    return optical 

# extract ground truth array from list 
# A helper function. Data returned from segment_batch is a list which contains two arrays. 
# The first array contains the optical data while the second contains the ground truth data 
def extract_gt_from_list(full_batch): 
    gt = full_batch[1] # extract array from list 
    return gt 

# This function accepts a tensor of size [batch_size, 2 ,record_size] 
# and segments in into two tensors of size [batch_size, record] along the second dimension 
# IMPORTANT: to be executed within an active session 
def segment_batch(batch_p): 
    batch_xs = tf.slice(batch_p, [0,0,0], [batch_size,1,n_input]) # optical data tensor 
    batch_ys = tf.slice(batch_p, [0,1,0], [batch_size,1,n_input])   # GT data tensor 
    optical = tf.reshape([batch_xs], [batch_size, n_input]) 
    gt = tf.reshape([batch_ys], [batch_size, n_input]) 

    return [optical, gt] 

# Create some wrappers for simplicity 
def conv2d(x, W, b, strides=1): 
# Conv2D wrapper, with bias and relu activation 
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME') 
    x = tf.nn.bias_add(x, b) 
    return tf.nn.relu(x) 


def maxpool2d(x, k=2): 
    # MaxPool2D wrapper 
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], 
         padding='SAME') 


# Create model 
def conv_net(x, weights, biases, dropout): 
    # Reshape input picture into 64x64 subimages [rows, rows, cols, channels] 
    x1 = tf.reshape(x, shape=[-1, rows, cols, 1]) # this is the 4-dimensional that tf.conv2D expects as Input 

    # Convolution Layer 
    conv1 = conv2d(x1, weights['wc1'], biases['bc1']) 
    # Max Pooling (down-sampling) 
    conv1 = maxpool2d(conv1, k=2) 

    # Convolution Layer 
    conv2 = conv2d(conv1, weights['wc2'], biases['bc2']) 
    # Max Pooling (down-sampling) 
    conv2 = maxpool2d(conv2, k=2) 

    # Fully connected layer 
    # Reshape conv2 output to fit fully connected layer input 
    fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]]) 
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1']) 
    fc1 = tf.nn.relu(fc1) 
    # Apply Dropout 
    #fc1 = tf.nn.dropout(fc1, dropout) 

    # Output image (edge), prediction 
    out = tf.add(tf.matmul(fc1, weights['out']), biases['out']) 

    # Add print operation 
    out = tf.Print(out, [out], message="This is out: ") 

    return [out, x] 

# Store layers weight & bias 
weights = { 
    # 5x5 conv, 1 input, 32 outputs 
    'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])), 
    # 5x5 conv, 32 inputs, 64 outputs 
    'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])), 
    # fully connected, 7*7*64 inputs, 1024 outputs 
    'wd1': tf.Variable(tf.random_normal([16*16*64, 1024])), 
    # 1024 inputs, 10 outputs (class prediction) 
    'out': tf.Variable(tf.random_normal([1024, n_classes])) 
} 

biases = { 
    'bc1': tf.Variable(tf.random_normal([32])), 
    'bc2': tf.Variable(tf.random_normal([64])), 
    'bd1': tf.Variable(tf.random_normal([1024])), 
    'out': tf.Variable(tf.random_normal([n_classes])) 
} 


#################################################################### 
##################### PLACEHOLDERS ################################# 
#################################################################### 
# tf Graph input (only pictures) 
X = tf.placeholder_with_default(extract_optical_from_list(segment_batch(data_batch)), [batch_size, n_input]) 
#################################################################### 
##################### END OF PLACEHOLDERS ########################## 
#################################################################### 

# tf Graph input 
keep_prob = tf.Variable(dropout) #dropout (keep probability) 

# Construct model 
pred = conv_net(extract_optical_from_list(X), weights, biases, keep_prob) # x[0] is the optical data 
y_true = extract_gt_from_list(extract_gt_from_list(X)) # y_true is the ground truth data 

# Define loss and optimizer 
cost = tf.reduce_mean(tf.pow(y_true - pred[0], 2)) 
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(cost) 


# Initializing the variables 
init = tf.global_variables_initializer() 

# Launch the graph 
with tf.Session() as sess: 
    sess.run(init) 
    step = 1 
    # Keep training until reach max iterations 
    while step * batch_size < training_iters: 
    print("Optimizing") 
    sess.run(optimizer) 
    print("Iter " + str(step*batch_size)) 
    step += 1 
print("Optimization Finished!") 

テンソル私は構文エラーを修正することができました。残念ながら、グラフの最適化部分の実行を開始したばかりです。私はこれをデバッグする方法がないので(Tensorflowデバッガの使用に関する非常に不十分な情報を見つけました)、私は間違っていたものを失ってしまいました! Tensorflowでより多くの経験を積んだ人がこのコードの何が間違っているのかを指摘できれば、私は多くの助けになります。

おかげで、事前

+0

"それはちょうどハングする"とはどういう意味ですか?何も起こりません?あなたはどれくらい待ったのですか?キューは最初に初期化され、_capacity_イメージで埋められる必要があります。 「デバッグ」のために、これが本当にエラーであるかどうかを確認するために、容量をより少ない数に減らそうとすることができます。 – aseipel

答えて

0

であなたがキューから最適化するためのデータを取得するためにキューランナーを起動する必要があります。

.... 
coord = tf.train.Coordinator() 
with tf.Session() as sess: 
    sess.run(init) 
    tf.train.start_queue_runners(sess=sess, coord=coord) 
    .... 
# also use tf.nn.sparse_softmax_cross_entropy_with_logits for cost 
+0

おかげさまでそれは私の馬鹿だった! – divined

+0

素晴らしい!あなたはその答えを受け入れるかもしれません。 –

関連する問題