https://kwotsin.github.io/tech/2017/02/11/transfer-learning.html を評価する際に、常に同じを予測し、私は、画像分類器Tensorflowスリム - 鉄道模型が、
トレーニングコードにするために上記のリンクをたどっ:
slim = tf.contrib.slim
dataset_dir = './data'
log_dir = './log'
checkpoint_file = './inception_resnet_v2_2016_08_30.ckpt'
image_size = 299
num_classes = 21
vlabels_file = './labels.txt'
labels = open(labels_file, 'r')
labels_to_name = {}
for line in labels:
label, string_name = line.split(':')
string_name = string_name[:-1]
labels_to_name[int(label)] = string_name
file_pattern = 'test_%s_*.tfrecord'
items_to_descriptions = {
'image': 'A 3-channel RGB coloured product image',
'label': 'A label that from 20 labels'
}
num_epochs = 10
batch_size = 16
initial_learning_rate = 0.001
learning_rate_decay_factor = 0.7
num_epochs_before_decay = 4
def get_split(split_name, dataset_dir, file_pattern=file_pattern, file_pattern_for_counting='products'):
if split_name not in ['train', 'validation']:
raise ValueError(
'The split_name %s is not recognized. Please input either train or validation as the split_name' % (
split_name))
file_pattern_path = os.path.join(dataset_dir, file_pattern % (split_name))
num_samples = 0
file_pattern_for_counting = file_pattern_for_counting + '_' + split_name
tfrecords_to_count = [os.path.join(dataset_dir, file) for file in os.listdir(dataset_dir) if
file.startswith(file_pattern_for_counting)]
for tfrecord_file in tfrecords_to_count:
for record in tf.python_io.tf_record_iterator(tfrecord_file):
num_samples += 1
test = num_samples
reader = tf.TFRecordReader
keys_to_features = {
'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
'image/format': tf.FixedLenFeature((), tf.string, default_value='jpg'),
'image/class/label': tf.FixedLenFeature(
[], tf.int64, default_value=tf.zeros([], dtype=tf.int64)),
}
items_to_handlers = {
'image': slim.tfexample_decoder.Image(),
'label': slim.tfexample_decoder.Tensor('image/class/label'),
}
decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers)
labels_to_name_dict = labels_to_name
dataset = slim.dataset.Dataset(
data_sources=file_pattern_path,
decoder=decoder,
reader=reader,
num_readers=4,
num_samples=num_samples,
num_classes=num_classes,
labels_to_name=labels_to_name_dict,
items_to_descriptions=items_to_descriptions)
return dataset
def load_batch(dataset, batch_size, height=image_size, width=image_size, is_training=True):
'''
Loads a batch for training.
INPUTS:
- dataset(Dataset): a Dataset class object that is created from the get_split function
- batch_size(int): determines how big of a batch to train
- height(int): the height of the image to resize to during preprocessing
- width(int): the width of the image to resize to during preprocessing
- is_training(bool): to determine whether to perform a training or evaluation preprocessing
OUTPUTS:
- images(Tensor): a Tensor of the shape (batch_size, height, width, channels) that contain one batch of images
- labels(Tensor): the batch's labels with the shape (batch_size,) (requires one_hot_encoding).
'''
# First create the data_provider object
data_provider = slim.dataset_data_provider.DatasetDataProvider(
dataset,
common_queue_capacity=24 + 3 * batch_size,
common_queue_min=24)
# Obtain the raw image using the get method
raw_image, label = data_provider.get(['image', 'label'])
# Perform the correct preprocessing for this image depending if it is training or evaluating
image = inception_preprocessing.preprocess_image(raw_image, height, width, is_training)
# As for the raw images, we just do a simple reshape to batch it up
raw_image = tf.expand_dims(raw_image, 0)
raw_image = tf.image.resize_nearest_neighbor(raw_image, [height, width])
raw_image = tf.squeeze(raw_image)
# Batch up the image by enqueing the tensors internally in a FIFO queue and dequeueing many elements with tf.train.batch.
images, raw_images, labels = tf.train.batch(
[image, raw_image, label],
batch_size=batch_size,
num_threads=4,
capacity=4 * batch_size,
allow_smaller_final_batch=True)
return images, raw_images, labels
def run():
# Create the log directory here. Must be done here otherwise import will activate this unneededly.
if not os.path.exists(log_dir):
os.mkdir(log_dir)
# ======================= TRAINING PROCESS =========================
# Now we start to construct the graph and build our model
with tf.Graph().as_default() as graph:
tf.logging.set_verbosity(tf.logging.INFO) # Set the verbosity to INFO level
# First create the dataset and load one batch
dataset = get_split('train', dataset_dir, file_pattern=file_pattern)
images, _, labels = load_batch(dataset, batch_size=batch_size)
# Know the number steps to take before decaying the learning rate and batches per epoch
num_batches_per_epoch = int(dataset.num_samples/batch_size)
num_steps_per_epoch = num_batches_per_epoch # Because one step is one batch processed
decay_steps = int(num_epochs_before_decay * num_steps_per_epoch)
# Create the model inference
with slim.arg_scope(inception_resnet_v2_arg_scope()):
logits, end_points = inception_resnet_v2(images, num_classes=dataset.num_classes, is_training=True)
# Define the scopes that you want to exclude for restoration
exclude = ['InceptionResnetV2/Logits', 'InceptionResnetV2/AuxLogits']
variables_to_restore = slim.get_variables_to_restore(exclude=exclude)
# Perform one-hot-encoding of the labels (Try one-hot-encoding within the load_batch function!)
one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes)
# Performs the equivalent to tf.nn.sparse_softmax_cross_entropy_with_logits but enhanced with checks
loss = tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels, logits=logits)
total_loss = tf.losses.get_total_loss() # obtain the regularization losses as well
# Create the global step for monitoring the learning_rate and training.
global_step = get_or_create_global_step()
# Define your exponentially decaying learning rate
lr = tf.train.exponential_decay(
learning_rate=initial_learning_rate,
global_step=global_step,
decay_steps=decay_steps,
decay_rate=learning_rate_decay_factor,
staircase=True)
# Now we can define the optimizer that takes on the learning rate
optimizer = tf.train.AdamOptimizer(learning_rate=lr)
# Create the train_op.
train_op = slim.learning.create_train_op(total_loss, optimizer)
# State the metrics that you want to predict. We get a predictions that is not one_hot_encoded.
predictions = tf.argmax(end_points['Predictions'], 1)
probabilities = end_points['Predictions']
accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy(predictions, labels)
metrics_op = tf.group(accuracy_update, probabilities)
# Now finally create all the summaries you need to monitor and group them into one summary op.
tf.summary.scalar('losses/Total_Loss', total_loss)
tf.summary.scalar('accuracy', accuracy)
tf.summary.scalar('learning_rate', lr)
my_summary_op = tf.summary.merge_all()
# Now we need to create a training step function that runs both the train_op, metrics_op and updates the global_step concurrently.
def train_step(sess, train_op, global_step):
'''
Simply runs a session for the three arguments provided and gives a logging on the time elapsed for each global step
'''
# Check the time for each sess run
start_time = time.time()
total_loss, global_step_count, _ = sess.run([train_op, global_step, metrics_op])
time_elapsed = time.time() - start_time
# Run the logging to print some results
logging.info('global step %s: loss: %.4f (%.2f sec/step)', global_step_count, total_loss, time_elapsed)
return total_loss, global_step_count
# Now we create a saver function that actually restores the variables from a checkpoint file in a sess
saver = tf.train.Saver(variables_to_restore)
def restore_fn(sess):
return saver.restore(sess, checkpoint_file)
# Define your supervisor for running a managed session. Do not run the summary_op automatically or else it will consume too much memory
sv = tf.train.Supervisor(logdir=log_dir, summary_op=None, init_fn=restore_fn)
# Run the managed session
with sv.managed_session() as sess:
for step in xrange(num_steps_per_epoch * num_epochs):
# At the start of every epoch, show the vital information:
if step % num_batches_per_epoch == 0:
logging.info('Epoch %s/%s', step/num_batches_per_epoch + 1, num_epochs)
learning_rate_value, accuracy_value = sess.run([lr, accuracy])
logging.info('Current Learning Rate: %s', learning_rate_value)
logging.info('Current Streaming Accuracy: %s', accuracy_value)
# optionally, print your logits and predictions for a sanity check that things are going fine.
logits_value, probabilities_value, predictions_value, labels_value = sess.run(
[logits, probabilities, predictions, labels])
print 'logits: \n', logits_value
print 'Probabilities: \n', probabilities_value
print 'predictions: \n', predictions_value
print 'Labels:\n:', labels_value
# Log the summaries every 10 step.
if step % 10 == 0:
loss, _ = train_step(sess, train_op, sv.global_step)
summaries = sess.run(my_summary_op)
sv.summary_computed(sess, summaries)
# If not, simply run the training step
else:
loss, _ = train_step(sess, train_op, sv.global_step)
# We log the final training loss and accuracy
logging.info('Final Loss: %s', loss)
logging.info('Final Accuracy: %s', sess.run(accuracy))
# Once all the training has been done, save the log files and checkpoint model
logging.info('Finished training! Saving model to disk now.')
sv.saver.save(sess, sv.save_path, global_step=sv.global_step)
をこのコードは、私が走っている動作するようです
log_dir = './log'
log_eval = './log_eval_test'
dataset_dir = './data'
batch_size = 10
num_epochs = 1
checkpoint_file = tf.train.latest_checkpoint('./')
def run():
if not os.path.exists(log_eval):
os.mkdir(log_eval)
with tf.Graph().as_default() as graph:
tf.logging.set_verbosity(tf.logging.INFO)
dataset = get_split('train', dataset_dir)
images, raw_images, labels = load_batch(dataset, batch_size=batch_size, is_training=False)
num_batches_per_epoch = dataset.num_samples/batch_size
num_steps_per_epoch = num_batches_per_epoch
with slim.arg_scope(inception_resnet_v2_arg_scope()):
logits, end_points = inception_resnet_v2(images, num_classes=dataset.num_classes, is_training=False)
variables_to_restore = slim.get_variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
def restore_fn(sess):
return saver.restore(sess, checkpoint_file)
predictions = tf.argmax(end_points['Predictions'], 1)
accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy(predictions, labels)
metrics_op = tf.group(accuracy_update)
global_step = get_or_create_global_step()
global_step_op = tf.assign(global_step, global_step + 1)
def eval_step(sess, metrics_op, global_step):
'''
Simply takes in a session, runs the metrics op and some logging information.
'''
start_time = time.time()
_, global_step_count, accuracy_value = sess.run([metrics_op, global_step_op, accuracy])
time_elapsed = time.time() - start_time
logging.info('Global Step %s: Streaming Accuracy: %.4f (%.2f sec/step)', global_step_count, accuracy_value,
time_elapsed)
return accuracy_value
tf.summary.scalar('Validation_Accuracy', accuracy)
my_summary_op = tf.summary.merge_all()
sv = tf.train.Supervisor(logdir=log_eval, summary_op=None, saver=None, init_fn=restore_fn)
with sv.managed_session() as sess:
for step in xrange(num_steps_per_epoch * num_epochs):
sess.run(sv.global_step)
if step % num_batches_per_epoch == 0:
logging.info('Epoch: %s/%s', step/num_batches_per_epoch + 1, num_epochs)
logging.info('Current Streaming Accuracy: %.4f', sess.run(accuracy))
if step % 10 == 0:
eval_step(sess, metrics_op=metrics_op, global_step=sv.global_step)
summaries = sess.run(my_summary_op)
sv.summary_computed(sess, summaries)
else:
eval_step(sess, metrics_op=metrics_op, global_step=sv.global_step)
logging.info('Final Streaming Accuracy: %.4f', sess.run(accuracy))
raw_images, labels, predictions = sess.run([raw_images, labels, predictions])
for i in range(10):
image, label, prediction = raw_images[i], labels[i], predictions[i]
prediction_name, label_name = dataset.labels_to_name[prediction], dataset.labels_to_name[label]
text = 'Prediction: %s \n Ground Truth: %s' % (prediction_name, label_name)
img_plot = plt.imshow(image)
plt.title(text)
img_plot.axes.get_yaxis().set_ticks([])
img_plot.axes.get_xaxis().set_ticks([])
plt.show()
logging.info(
'Model evaluation has completed! Visit TensorBoard for more information regarding your evaluation.')
:94%の精度に
評価コードを取得し、いくつかのサンプルデータとイムに関する研修
したがって、モデルをトレーニングして94%の精度を得た後、モデルを評価しようとしました。評価では、私は全時間0-1%の精度を取得します。私はこれを調査して毎回同じクラスを予測していることがわかりました
labels: [7, 11, 5, 1, 20, 0, 18, 1, 0, 7]
predictions: [10, 10, 10, 10, 10, 10, 10, 10, 10, 10]
誰かが間違っているかもしれないところで助けてもらえますか?
EDIT:
TensorBoard精度および損失フォームトレーニング評価から
TensorBoard精度
EDIT:
まだこの問題を解決することはできていません。私が代わりに
saver = tf.train.import_meta_graph('/log/model.ckpt.meta')
def restore_fn(sess):
return saver.restore(sess, checkpoint_file)
代わり
variables_to_restore = slim.get_variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
def restore_fn(sess):
return saver.restore(sess, checkpoint_file)
のモデルを復元するために、これを使用してみましたし、ただ単に非常にかかりますので、どのように私はevalのスクリプトでグラフを復元していますに問題があるかもしれないと思いました時間がかかり、最後にエラーが発生します。私はその後、節電器(saver = tf.train.Saver(variables_to_restore, write_version=saver_pb2.SaveDef.V1)
)でライターのV1を使ってみましたが、このチェックポイントを読み込むことができませんでした。
私はまた、訓練を受けた同じデータで私のevalスクリプトを実行しようとしました。
最後に私はチュートリアルで同じデータセットを使用してURLからレポを再クローンし、訓練中に84%にしても評価すると0~3%の精度を得ます。また、私の訓練を再開したときに正確さが残っている場所から継続するので、私のチェックポイントは正しい情報を持っていなければなりません。私はモデルを復元するときに何か正しく動作していないように感じます。
私は21のクラス分類子を作成しています。私のデータセットは、各クラスの画像の量が同じです。また、私が訓練を実行するとき、コードはラベルと予測を出力します。これは、評価するときに複数のクラスで動作することを示しています。トレーニング '予測から サンプル予測: [17 13 7 6 13 20 19 3 15 0 18 15 10 11 19 3] ラベル: :[17 13 7 6 13 20 19 3 15 0 18 15 10 11 19 3] ' – Neil
あなたはいくつのデータを持っていますか?あなたはそのモデルでどれくらいの栄養を与えていますか? 94%の精度を達成した各エポックの結果を投稿できますか? 正しいモデルをロードしてもよろしいですか?私はcheckpoint_file = ".."トレーニングコードのようにフルパスを入力しようとします –
私はそれにクラスあたり20個の画像を持つ私の実際のデータの小さなサブセットで作業しています。私は残念なことに、ターミナル出力が予測を示していないが、 – Neil