CNNを使ってテンソルフローで訓練されたモデルを使用してバウンディングボックスでオブジェクトを検出する方法

テンソルフローのモデルを90％以上の精度でCNNを使用して作成しました。実際にはうまくいきましたが、このモデルを使用して私が訓練したバウンディングボックスでオブジェクトを検出する方法はわかりません。私のモデルには多くのクラスが含まれていて、クラスの名前に関連付けられたラベルが1つあります。私はssdについていくつかの方法を読んでいましたが、それはできますが、実際にどのように動作するのか分かりません。マイCNNs以下：CNNを使ってテンソルフローで訓練されたモデルを使用してバウンディングボックスでオブジェクトを検出する方法

def cnn_model_fn(features,labels,mode): 
#Input layer 
input_layer = tf.reshape(features["x"],[-1,28,28,1]) 

#Convolutional layer 1 
conv1 = tf.layers.conv2d(
inputs=input_layer, 
filters=32, 
kernel_size=[5,5], 
padding="same", 
activation=tf.nn.relu) 

#Pooling Layer 1 
pool1 = tf.layers.max_pooling2d(inputs=conv1,pool_size=[2,2],strides=2) 

#Convolutional layer 2 
conv2 = tf.layers.conv2d(
    inputs=pool1, 
    filters=64, 
    kernel_size=[5,5], 
    padding="same", 
    activation=tf.nn.relu) 

#Pooling layer 2 
pool2 = tf.layers.max_pooling2d(inputs=conv2,pool_size=[2,2],strides=2) 

#Debse layer 
pool2_flat = tf.reshape(pool2,[-1,7*7*64]) 
dense = tf.layers.dense(inputs=pool2_flat,units=1024,activation=tf.nn.relu) 

#Dropout 
dropout = tf.layers.dropout(inputs=dense,rate=0.4,training=mode == tf.estimator.ModeKeys.TRAIN) 

#Logits layer 
logits = tf.layers.dense(inputs=dropout,units=10) 

predictions = { 
"classes":tf.argmax(input=logits,axis=1), 
"probabilities":tf.nn.softmax(logits,name="softmax_tensor") 
} 

if mode == tf.estimator.ModeKeys.PREDICT: 
    return tf.estimator.EstimatorSpec(mode=mode,predictions=predictions) 

#Calculate Loss 
onehot_labels = tf.one_hot(indices=tf.cast(labels,tf.int32),depth=10) 
loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels,logits=logits) 

if mode == tf.estimator.ModeKeys.TRAIN: 
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001) 
    train_op = optimizer.minimize(
     loss=loss, 
     global_step=tf.train.get_global_step()) 
    return tf.estimator.EstimatorSpec(mode=mode,loss=loss,train_op=train_op) 

eval_metric_ops = { 
"accuracy":tf.metrics.accuracy(labels=labels,predictions=predictions["classes"]) 
} 

return tf.estimator.EstimatorSpec(mode=mode,loss=loss,eval_metric_ops=eval_metric_ops)

そして、私がメインで私のアプリを実行します。

def main(unused_argv): 
# Load training and eval data 
train_data_dir = "W:/Projects/AutoDrive/Training" 
test_data_dir = "W:/Projects/AutoDrive/Testing" 

images,labels = load_data(train_data_dir) 
test_images,test_labels = load_data(test_data_dir) 

print("Labels: {0} \nImages: {1}".format(len(set(labels)),len(images))) 

for image in images[:5]: 
    print("shape: {0}, min: {1}, max: {2}".format(image.shape, image.min(), image.max())) 

images = [skimage.transform.resize(image,(28,28,1)) for image in images] 

for image in images[:5]: 
    print("shape: {0}, min: {1}, max: {2}".format(image.shape, image.min(), image.max())) 

images = np.asarray(images,dtype=np.float32) 
labels = np.asarray(labels,dtype=np.int32) 


# Create the Estimator 
TSRecognition_classifier = tf.estimator.Estimator(
model_fn=cnn_model_fn, model_dir="/tmp/TSRecognition_convnet_model") 

# Set up logging for predictions 
# Log the values in the "Softmax" tensor with label "probabilities" 
tensors_to_log = {"probabilities": "softmax_tensor"} 
logging_hook = tf.train.LoggingTensorHook(
tensors=tensors_to_log, every_n_iter=50) 

# Train the model 
train_input_fn = tf.estimator.inputs.numpy_input_fn(
         x={"x": images}, 
         y=labels, 
         batch_size=100, 
         num_epochs=None, 
         shuffle=True) 

TSRecognition_classifier.train(
     input_fn=train_input_fn, 
     steps=20000, 
     hooks=[logging_hook]) 

# Evaluate the model and print results 
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
         x={"x": test_images}, 
         y=test_labels, 
         num_epochs=1, 
         shuffle=False) 
eval_results = TSRecognition_classifier.evaluate(input_fn=eval_input_fn) 
print(eval_results)

そして、あなたが見たい場合は、これは完全なコードです：

from __future__ import absolute_import 
from __future__ import division 
from __future__ import print_function 

import numpy as np 
import tensorflow as tf 
import os 
import skimage.data 
import skimage.transform 
import matplotlib 
import matplotlib.pyplot as plt 

tf.logging.set_verbosity(tf.logging.INFO) 

def load_data(data_dir): 
    """Load Data and return two lists""" 
    directories = [d for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir,d))] 

    list_labels = [] 
    list_images = [] 

    for d in directories: 
     label_dir = os.path.join(data_dir,d) 
     file_names = [os.path.join(label_dir,f) for f in os.listdir(label_dir) if f.endswith(".ppm")] 
     for f in file_names: 
      list_images.append(skimage.data.imread(f)) 
      list_labels.append(int(d)) 

    return list_images,list_labels 

def display_images_and_labels(images,labels): 
    unique_labels = set(labels) 
    plt.figure(figsize=(15,15)) 
    i = 1 
    for label in unique_labels: 
     image = images[labels.index(label)] 
     plt.subplot(8,8,i) 
     plt.axis('off') 
     plt.title("Label {0} ({1})".format(label,labels.count(label))) 
     i += 1 
     _ = plt.imshow(image) 
     plt.show() 

def cnn_model_fn(features,labels,mode): 
     #Input layer 
     input_layer = tf.reshape(features["x"],[-1,28,28,1]) 

     #Convolutional layer 1 
     conv1 = tf.layers.conv2d(
      inputs=input_layer, 
      filters=32, 
      kernel_size=[5,5], 
      padding="same", 
      activation=tf.nn.relu) 

     #Pooling Layer 1 
     pool1 = tf.layers.max_pooling2d(inputs=conv1,pool_size=[2,2],strides=2) 

     #Convolutional layer 2 
     conv2 = tf.layers.conv2d(
        inputs=pool1, 
        filters=64, 
        kernel_size=[5,5], 
        padding="same", 
        activation=tf.nn.relu) 

     #Pooling layer 2 
     pool2 = tf.layers.max_pooling2d(inputs=conv2,pool_size=[2,2],strides=2) 

     #Debse layer 
     pool2_flat = tf.reshape(pool2,[-1,7*7*64]) 
     dense = tf.layers.dense(inputs=pool2_flat,units=1024,activation=tf.nn.relu) 

     #Dropout 
     dropout = tf.layers.dropout(inputs=dense,rate=0.4,training=mode == tf.estimator.ModeKeys.TRAIN) 

     #Logits layer 
     logits = tf.layers.dense(inputs=dropout,units=10) 

     predictions = { 
       "classes":tf.argmax(input=logits,axis=1), 
       "probabilities":tf.nn.softmax(logits,name="softmax_tensor") 
       } 

     if mode == tf.estimator.ModeKeys.PREDICT: 
      return tf.estimator.EstimatorSpec(mode=mode,predictions=predictions) 

     #Calculate Loss 
     onehot_labels = tf.one_hot(indices=tf.cast(labels,tf.int32),depth=10) 
     loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels,logits=logits) 

     if mode == tf.estimator.ModeKeys.TRAIN: 
      optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001) 
      train_op = optimizer.minimize(
         loss=loss, 
         global_step=tf.train.get_global_step()) 
      return tf.estimator.EstimatorSpec(mode=mode,loss=loss,train_op=train_op) 

     eval_metric_ops = {"accuracy":tf.metrics.accuracy(labels=labels,predictions=predictions["classes"]) 
} 


      return tf.estimator.EstimatorSpec(mode=mode,loss=loss,eval_metric_ops=eval_metric_ops) 


def main(unused_argv): 
    # Load training and eval data 
    train_data_dir = "W:/Projects/AutoDrive/Training" 
    test_data_dir = "W:/Projects/AutoDrive/Testing" 

    images,labels = load_data(train_data_dir) 
    test_images,test_labels = load_data(test_data_dir) 

    print("Labels: {0} \nImages: {1}".format(len(set(labels)),len(images))) 

    for image in images[:5]: 
     print("shape: {0}, min: {1}, max: {2}".format(image.shape, image.min(), image.max())) 

    images = [skimage.transform.resize(image,(28,28,1)) for image in images] 

    for image in images[:5]: 
     print("shape: {0}, min: {1}, max: {2}".format(image.shape, image.min(), image.max())) 

    images = np.asarray(images,dtype=np.float32) 
    labels = np.asarray(labels,dtype=np.int32) 


    # Create the Estimator 
    TSRecognition_classifier = tf.estimator.Estimator(
    model_fn=cnn_model_fn, model_dir="/tmp/TSRecognition_convnet_model") 

    # Set up logging for predictions 
    # Log the values in the "Softmax" tensor with label "probabilities" 
    tensors_to_log = {"probabilities": "softmax_tensor"} 
    logging_hook = tf.train.LoggingTensorHook(
        tensors=tensors_to_log, every_n_iter=50) 

    # Train the model 
    train_input_fn = tf.estimator.inputs.numpy_input_fn(
         x={"x": images}, 
         y=labels, 
         batch_size=100, 
         num_epochs=None, 
         shuffle=True) 

    TSRecognition_classifier.train(
     input_fn=train_input_fn, 
     steps=20000, 
     hooks=[logging_hook]) 

    # Evaluate the model and print results 
    eval_input_fn = tf.estimator.inputs.numpy_input_fn(
         x={"x": test_images}, 
         y=test_labels, 
         num_epochs=1, 
         shuffle=False) 
    eval_results = TSRecognition_classifier.evaluate(input_fn=eval_input_fn) 
    print(eval_results) 

if __name__ == "__main__": 
    tf.app.run()

Addtionally、私が見てきました私はそれが私を助けることができると思うビデオ。しかし、彼らはただ一つのオブジェクトを訓練するのに役立ちます。どんなアイデアが私を助けることができる？

出典

2017-12-02 buiquangdinh

境界ボックスを出力できるCNNと、入力イメージのみを分類するCNNがあります。あなたのものが第二のタイプです。テンソルフローの境界ボックスを使用する場合は、オブジェクト検出APIを使用して、マルチクラスSSDを構築し、より高速なrcnnを作成することができます。https://github.com/tensorflow/models/tree/master/research/object_detection

出典

2017-12-05 07:09:44

CNNを使ってテンソルフローで訓練されたモデルを使用してバウンディングボックスでオブジェクトを検出する方法

答えて

関連する問題