テンソルフローカスタム勾配

テンソルフローでカスタムレイヤーを作成します。そこで、私たちは単純におもちゃの例で始めることにしました。コピーレイヤーです。いくつかの試行錯誤の後、勾配が正しい値を通すように見えるところまで到達しました。しかし、2回目の反復では、NANが得られます。それは簡単な間違いかもしれませんが、現在私はそれを見ることができません。テンソルフローカスタム勾配

誰かがここでの問題を見つけることができ、どのようにそれを解決するために：一般的に

、私は2つの質問がありますか？

テンソルフローセッションをデバッグするには、どのような方法が適していますか？ copy_op.cc

#include "tensorflow/core/framework/op.h" 
#include "tensorflow/core/framework/op_kernel.h" 
#include <stdio.h> 

namespace tensorflow { 



typedef Eigen::ThreadPoolDevice CPUDevice; 
typedef Eigen::GpuDevice GPUDevice; 

template<typename Device, typename T> 
class MyCopyOp: public OpKernel { 
public: 
    explicit MyCopyOp(OpKernelConstruction* context) : 
      OpKernel(context) { 
    } 

    void Compute(OpKernelContext* context) override { 
     const Tensor& input = context->input(0); 
     auto in_flat = input.flat<T>(); 

     printf("Debug MyCopyOp Features: %s \n",input.DebugString().c_str()); 

     Tensor* output = nullptr; 
     OP_REQUIRES_OK(context, 
       context->allocate_output(0, input.shape(), &output)); 

     auto out_flat = output->flat<T>(); 
     out_flat.setZero(); 

     for (int d = 0; d < input.dims(); ++d) { 
      for (int i = 0; i < input.dim_size(d); ++i) { 
       out_flat(d * input.dim_size(d) + i) = in_flat(
         d * input.dim_size(d) + i); 
      } 
     } 

     printf("Debug MyCopyOp Output: %s \n",output->DebugString().c_str()); 
    } 

}; 


template<typename Device, typename T> 
class MyCopyGradOp: public OpKernel { 
public: 
    explicit MyCopyGradOp(OpKernelConstruction* context) : 
      OpKernel(context) { 

    } 

    void Compute(OpKernelContext* context) override { 
     printf("called MyCopyGradOp.Compute() \n"); 
     const Tensor& gradients = context->input(0); 
     const Tensor& features = context->input(1); 
     printf("Debug MyCopyOpGrad Gradients: %s \n",gradients.DebugString().c_str()); 
     printf("Debug MyCopyOpGrad Features: %s \n",features.DebugString().c_str()); 

     TensorShape output_shape = features.shape(); 

     Tensor* output = nullptr; 
     OP_REQUIRES_OK(context, 
       context->allocate_output(0, output_shape, &output)); 
     output->flat<T>().setZero(); 

     const T* btm_ptr = gradients.flat<T>().data(); 
     T* top_ptr = output->flat<T>().data(); 

     for (int i = 0; i < gradients.NumElements(); ++i) { 
      top_ptr[i] = btm_ptr[i]; 
     } 

     printf("Debug MyCopyOpGrad Output: %s \n",output->DebugString().c_str()); 
     printf("---------------------------------- \n"); 
    } 

}; 


REGISTER_OP("MyCopy") 
.Input("features: T") 
.Output("output: T") 
.Attr("T: realnumbertype") 
.Doc(R"doc(
Copies all input values to the output 
)doc"); 

REGISTER_OP("MyCopyGrad") 
.Input("gradients: T") 
.Input("features: T") 
.Output("backprops: T") 
.Attr("T: realnumbertype") 
.Doc(R"doc(
TODO!! 
)doc"); 


#define REGISTER_MYCOPY_KERNELS(type)           \ 
    REGISTER_KERNEL_BUILDER(              \ 
     Name("MyCopy").Device(DEVICE_CPU).TypeConstraint<type>("T"),    \ 
     MyCopyOp<Eigen::ThreadPoolDevice, type>);         \ 
    REGISTER_KERNEL_BUILDER(              \ 
     Name("MyCopyGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"),   \ 
     MyCopyGradOp<Eigen::ThreadPoolDevice, type>);        // \ 
    // REGISTER_KERNEL_BUILDER(              \ 
    //  Name("MyCopy").Device(DEVICE_GPU).TypeConstraint<type>("T"),    \ 
    //  MyCopyOp<Eigen::GpuDevice, type>);          \ 
    // REGISTER_KERNEL_BUILDER(              \ 
    //  Name("MyCopyGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"),   \ 
    //  MyCopyGradOp<Eigen::GpuDevice, type>);         


REGISTER_MYCOPY_KERNELS(float); 
REGISTER_MYCOPY_KERNELS(int); 
REGISTER_MYCOPY_KERNELS(double); 


}

は、我々は基礎として、単純なMNIST例を使用：

layer_test.py

from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('MNIST_data', one_hot=True) import tensorflow as tf from tensorflow.python.framework import ops copy_op_module = tf.load_op_library('copy_op.so') @ops.RegisterGradient("MyCopy") def _CopyOpGrad(op, grad): return copy_op_module.my_copy_grad(grad,op.inputs[0]) sess = tf.InteractiveSession() x = tf.placeholder(tf.float32, shape=[None, 784]) y_ = tf.placeholder(tf.float32, shape=[None, 10]) W = tf.Variable(tf.zeros([784,10])) b = tf.Variable(tf.zeros([10])) sess.run(tf.initialize_all_variables()) y1 = tf.nn.softmax(tf.matmul(x,W) + b) y = copy_op_module.my_copy(y1) //Here: MyCopy Layer is inserted cross_entropy = -tf.reduce_sum(y_*tf.log(y)) train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy) for i in range(2): batch = mnist.train.next_batch(50) train_step.run(feed_dict={x: batch[0], y_: batch[1]}) correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) print(accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels}))

コンパイル

TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())') TF_LIB=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())') g++ -std=c++11 -shared copy_op.cc -o copy_op.so -I $TF_INC -L $TF_LIB -fPIC -Wl,-rpath $TF_LIB

出力：事前に

Debug MyCopyOp Features: Tensor<type: float shape: [50,10] values: 0.1 0.1 0.1...> Debug MyCopyOp Output: Tensor<type: float shape: [50,10] values: 0.1 0.1 0.1...> called MyCopyGradOp.Compute() Debug MyCopyOpGrad Gradients: Tensor<type: float shape: [50,10] values: -0 -0 -0...> Debug MyCopyOpGrad Features: Tensor<type: float shape: [50,10] values: 0.1 0.1 0.1...> Debug MyCopyOpGrad Output: Tensor<type: float shape: [50,10] values: -0 -0 -0...> ---------------------------------- Debug MyCopyOp Features: Tensor<type: float shape: [50,10] values: nan nan nan...> Debug MyCopyOp Output: Tensor<type: float shape: [50,10] values: nan nan nan...> called MyCopyGradOp.Compute() Debug MyCopyOpGrad Gradients: Tensor<type: float shape: [50,10] values: nan nan nan...> Debug MyCopyOpGrad Features: Tensor<type: float shape: [50,10] values: nan nan nan...> Debug MyCopyOpGrad Output: Tensor<type: float shape: [50,10] values: nan nan nan...> ---------------------------------- Debug MyCopyOp Features: Tensor<type: float shape: [10000,10] values: nan nan nan...> Debug MyCopyOp Output: Tensor<type: float shape: [10000,10] values: nan nan nan...> 0.098

おかげでたくさん！コメントでmrryから

出典

2016-03-24 avo

出力からは、あなたの 'MyCopyOp'と' MyCopyGradOp'が意図したとおりに動作しているようです。コピーを使わずにウェイトが「NaN」になるかどうかを確認できますか？（これを行うには、単にコピーレイヤーを削除し、1回のトレーニングステップを実行し、2回目の繰り返しで 'y1.eval（feed_dict = {x：batch [0]、y_：batch [1]}）'）を呼び出します。 – mrry

それは価値があるため、 '-tf.reduce_sum（y_ * tf.log（y））'を使ってクロスエントロピーを計算する（代わりに 'tf.nn.softmax_cross_entropy_with_logits（y、y_）'を使う） 'W'変数をゼロに初期化すると、ランダムに初期化するよりも悪い結果になることがよくあります。（詳細は、[この回答]（http://stackoverflow.com/a/36134261/3574081）を参照してください。） – mrry

助けてくれてありがとう！ 1.コピーレイヤーy1のエバリュールを使用せずに '[[0.07910535 0.07910535 0.07910535 0.11042032 0.10930145 ...'をコピーすると、1ステップ後の結果は '[ナノナナウナン...] – avo

：使用すると安定性の問題が知られている - tf.reduce_sum(y_ * tf.log(y))をクロスエントロピーを計算するために（代わりにtf.nn.softmax_cross_entropy_with_logits(y, y_)を使用）、そしてゼロにあなたのW変数を初期化することは、多くの場合、ランダムにそれを初期化するよりも悪い結果につながります。 This answerには重量の初期化の問題についての詳細があります。

出典

2017-11-18 15:08:56 dga

テンソルフローカスタム勾配

答えて

関連する問題