私は、ロジットクラス予測子の代わりに画像を出力する畳み込みネットワークを使用する必要があるプロジェクトに着手しました。この目的のために、私はダウンロードしたCNNコードをアダプタに持ってきましたhttps://github.com/aymericdamien/TensorFlow-ExamplesTensorflow画像を返す畳み込みネットワーク(ログなし)
私の入力データはバイナリファイルから読み込まれた64x64画像です。バイナリファイルは、2つの64x64イメージのレコードから順に構成されます。私はネットワークの第2のイメージと64x64出力の違いであるコスト関数を最小限にする必要があります。
これは私が入力されたデータを読み出すために書いたモジュールです:
import tensorflow as tf
# various initialization variables
BATCH_SIZE = 128
N_FEATURES = 9
# This function accepts a tensor of size [batch_size, 2 ,record_size]
# and segments in into two tensors of size [batch_size, record] along the second dimension
# IMPORTANT: to be executed within an active session
def segment_batch(batch_p, batch_size, n_input):
batch_xs = tf.slice(batch_p, [0,0,0], [batch_size,1,n_input]) # optical data tensor
batch_ys = tf.slice(batch_p, [0,1,0], [batch_size,1,n_input]) # GT data tensor
optical = tf.reshape([batch_xs], [batch_size, n_input])
gt = tf.reshape([batch_ys], [batch_size, n_input])
return [optical, gt]
def batch_generator(filenames, record_size, batch_size):
""" filenames is the list of files you want to read from.
record_bytes: The size of a record in bytes
batch_size: The size a data batch (examples/batch)
"""
filename_queue = tf.train.string_input_producer(filenames)
reader = tf.FixedLengthRecordReader(record_bytes=2*record_size) # record size is double the value given (optical + ground truth images)
_, value = reader.read(filename_queue)
# read in the data (UINT8)
content = tf.decode_raw(value, out_type=tf.uint8)
# The bytes read represent the image, which we reshape
# from [depth * height * width] to [depth, height, width].
# read optical data slice
depth_major = tf.reshape(
tf.strided_slice(content, [0],
[record_size]),
[1, 64, 64])
# read GT (ground truth) data slice
depth_major1 = tf.reshape(
tf.strided_slice(content, [record_size],
[2*record_size]),
[1, 64, 64])
# Optical data
# Convert from [depth, height, width] to [height, width, depth].
uint8image = tf.transpose(depth_major, [1, 2, 0])
uint8image = tf.reshape(uint8image, [record_size]) # reshape into a single-dimensional vector
uint8image = tf.cast(uint8image, tf.float32) # cast into a float32
uint8image = uint8image/255 # normalize
# Ground Truth data
# Convert from [depth, height, width] to [height, width, depth].
gt_image = tf.transpose(depth_major1, [1, 2, 0])
gt_image = tf.reshape(gt_image, [record_size]) # reshape into a single-dimensional vector
gt_image = tf.cast(gt_image, tf.float32) # cast into a float32
gt_image = gt_image/255 # normalize
# stack them into a single features tensor
features = tf.stack([uint8image, gt_image])
# minimum number elements in the queue after a dequeue, used to ensure
# that the samples are sufficiently mixed
# I think 10 times the BATCH_SIZE is sufficient
min_after_dequeue = 10 * batch_size
# the maximum number of elements in the queue
capacity = 20 * batch_size
# shuffle the data to generate BATCH_SIZE sample pairs
data_batch = tf.train.shuffle_batch([features], batch_size=batch_size,
capacity=capacity, min_after_dequeue=min_after_dequeue)
return data_batch
これは私の実装の主なコードです:の形状を微調整をたくさんした後
from __future__ import print_function
# Various initialization variables
DATA_PATH_OPTICAL_TRAIN = 'data/building_ground_truth_for_training.bin'
DATA_PATH_EVAL = 'data/building_ground_truth_for_eval.bin'
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import time
# custom imports
import data_reader2
# Parameters
learning_rate = 0.001
training_iters = 200000
batch_size = 128
epochs = 10
display_step = 10
rows = 64
cols = 64
# Network Parameters
n_input = 4096 # optical image data (img shape: 64*64)
n_classes = 4096 # output is an image of same resolution as initial image
dropout = 0.75 # Dropout, probability to keep units
# input data parameters
record_size = 64**2
total_bytes_of_optical_binary_file = 893329408 # total size of binary file containing training data ([64z64 optical] [64x64 GT])
# create the data batches (queue)
# Accepts two parameters. The tensor containing the binary files and the size of a record
data_batch = data_reader2.batch_generator([DATA_PATH_OPTICAL_TRAIN],record_size, batch_size) # train set
data_batch_eval = data_reader2.batch_generator([DATA_PATH_EVAL],record_size, batch_size) # train set
##############################################################
######################### FUNCTIONS ##########################
##############################################################
# extract optical array from list
# A helper function. Data returned from segment_batch is a list which contains two arrays.
# The first array contains the optical data while the second contains the ground truth data
def extract_optical_from_list(full_batch):
optical = full_batch[0] # extract array from list
return optical
# extract ground truth array from list
# A helper function. Data returned from segment_batch is a list which contains two arrays.
# The first array contains the optical data while the second contains the ground truth data
def extract_gt_from_list(full_batch):
gt = full_batch[1] # extract array from list
return gt
# This function accepts a tensor of size [batch_size, 2 ,record_size]
# and segments in into two tensors of size [batch_size, record] along the second dimension
# IMPORTANT: to be executed within an active session
def segment_batch(batch_p):
batch_xs = tf.slice(batch_p, [0,0,0], [batch_size,1,n_input]) # optical data tensor
batch_ys = tf.slice(batch_p, [0,1,0], [batch_size,1,n_input]) # GT data tensor
optical = tf.reshape([batch_xs], [batch_size, n_input])
gt = tf.reshape([batch_ys], [batch_size, n_input])
return [optical, gt]
# Create some wrappers for simplicity
def conv2d(x, W, b, strides=1):
# Conv2D wrapper, with bias and relu activation
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
return tf.nn.relu(x)
def maxpool2d(x, k=2):
# MaxPool2D wrapper
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
padding='SAME')
# Create model
def conv_net(x, weights, biases, dropout):
# Reshape input picture into 64x64 subimages [rows, rows, cols, channels]
x1 = tf.reshape(x, shape=[-1, rows, cols, 1]) # this is the 4-dimensional that tf.conv2D expects as Input
# Convolution Layer
conv1 = conv2d(x1, weights['wc1'], biases['bc1'])
# Max Pooling (down-sampling)
conv1 = maxpool2d(conv1, k=2)
# Convolution Layer
conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
# Max Pooling (down-sampling)
conv2 = maxpool2d(conv2, k=2)
# Fully connected layer
# Reshape conv2 output to fit fully connected layer input
fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
fc1 = tf.nn.relu(fc1)
# Apply Dropout
#fc1 = tf.nn.dropout(fc1, dropout)
# Output image (edge), prediction
out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
# Add print operation
out = tf.Print(out, [out], message="This is out: ")
return [out, x]
# Store layers weight & bias
weights = {
# 5x5 conv, 1 input, 32 outputs
'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
# 5x5 conv, 32 inputs, 64 outputs
'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
# fully connected, 7*7*64 inputs, 1024 outputs
'wd1': tf.Variable(tf.random_normal([16*16*64, 1024])),
# 1024 inputs, 10 outputs (class prediction)
'out': tf.Variable(tf.random_normal([1024, n_classes]))
}
biases = {
'bc1': tf.Variable(tf.random_normal([32])),
'bc2': tf.Variable(tf.random_normal([64])),
'bd1': tf.Variable(tf.random_normal([1024])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
####################################################################
##################### PLACEHOLDERS #################################
####################################################################
# tf Graph input (only pictures)
X = tf.placeholder_with_default(extract_optical_from_list(segment_batch(data_batch)), [batch_size, n_input])
####################################################################
##################### END OF PLACEHOLDERS ##########################
####################################################################
# tf Graph input
keep_prob = tf.Variable(dropout) #dropout (keep probability)
# Construct model
pred = conv_net(extract_optical_from_list(X), weights, biases, keep_prob) # x[0] is the optical data
y_true = extract_gt_from_list(extract_gt_from_list(X)) # y_true is the ground truth data
# Define loss and optimizer
cost = tf.reduce_mean(tf.pow(y_true - pred[0], 2))
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(cost)
# Initializing the variables
init = tf.global_variables_initializer()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
# Keep training until reach max iterations
while step * batch_size < training_iters:
print("Optimizing")
sess.run(optimizer)
print("Iter " + str(step*batch_size))
step += 1
print("Optimization Finished!")
テンソル私は構文エラーを修正することができました。残念ながら、グラフの最適化部分の実行を開始したばかりです。私はこれをデバッグする方法がないので(Tensorflowデバッガの使用に関する非常に不十分な情報を見つけました)、私は間違っていたものを失ってしまいました! Tensorflowでより多くの経験を積んだ人がこのコードの何が間違っているのかを指摘できれば、私は多くの助けになります。
おかげで、事前
"それはちょうどハングする"とはどういう意味ですか?何も起こりません?あなたはどれくらい待ったのですか?キューは最初に初期化され、_capacity_イメージで埋められる必要があります。 「デバッグ」のために、これが本当にエラーであるかどうかを確認するために、容量をより少ない数に減らそうとすることができます。 – aseipel