2016-09-29 14 views
3

TensorFlowでLinearRegressorをトレーニングしようとしています。私はサイトのチュートリアルを通して作業しており、今私自身のデータセットにこれを適用しようとしています。python - tensorflow - StringToHashBucketFastへの入力操作の種類エラー

多くの変更を加えた場合、特に予想通りにどのデータ型が渡されているのか、同様のエラーが発生します。

import pandas as pd 
import tempfile 
COLUMNS = ['imp_time', 'width', 'height', 
      'geo_region', 'venue_id', 'seller_member_id', 
      'site_domain', 'tag_id', 'geo_city', 'fold_position', 'event_type'] 

train_file = 'imp-train.csv' 
test_file = 'imp-test.csv' 

df_train = pd.read_table(train_file, names=COLUMNS, skipinitialspace=True, skiprows=1).dropna() 
df_test = pd.read_table(test_file, names=COLUMNS, skipinitialspace=True, skiprows=1).dropna() 

LABEL_COLUMN = "label" 
df_train[LABEL_COLUMN] = (df_train["event_type"].apply(lambda x: 1 if x == "click" else 0)).astype(int) 
df_test[LABEL_COLUMN] = (df_test["event_type"].apply(lambda x: 1 if x == "click" else 0)).astype(int) 

CATEGORICAL_COLUMNS = ["width", "height","geo_region", "venue_id", 
         "seller_member_id", "site_domain", "tag_id", "geo_city", "fold_position"] 
CONTINUOUS_COLUMNS = [] 
import tensorflow as tf 

def input_fn(df): 
    # creates dict mapping from each continous feature column name (k) to 
    # the values of that column stored in a constant Tensor 
    continous_cols = {k: tf.constant(df[k].values) 
        for k in CONTINUOUS_COLUMNS} 
    # creates a dict mapping from each categorocal feature column name (k) to 
    # the values of that column stored in a tf.SparseTensor 
    categorical_cols = {k: tf.SparseTensor(
      indices=[[i, 0] for i in range(df[k].size)], 
      values=df[k].values, 
      shape=[df[k].size, 1]) 
          for k in CATEGORICAL_COLUMNS} 
    # merge the two dicts into one 
    feature_cols = dict(continous_cols.items() + categorical_cols.items()) 
    # convert the label col into a constant Tensor 
    label = tf.constant(df[LABEL_COLUMN].values) 
    # return the feature cols and label 
    return feature_cols, label 

def train_input_fn(): 
    return input_fn(df_train) 

def eval_input_fn(): 
    return input_fn(df_test) 

# base categorical feature cols 
width = tf.contrib.layers.sparse_column_with_hash_bucket("width", hash_bucket_size=100) 
height = tf.contrib.layers.sparse_column_with_hash_bucket("height", hash_bucket_size=100) 
geo_region = tf.contrib.layers.sparse_column_with_hash_bucket("geo_region", hash_bucket_size=10000) 
venue_id = tf.contrib.layers.sparse_column_with_hash_bucket("venue_id", hash_bucket_size=10000) 
seller_member_id = tf.contrib.layers.sparse_column_with_hash_bucket("seller_member_id", hash_bucket_size=10000) 
site_domain = tf.contrib.layers.sparse_column_with_hash_bucket("site_domain", hash_bucket_size=10000) 
tag_id = tf.contrib.layers.sparse_column_with_hash_bucket("tag_id", hash_bucket_size=100000) 
fold_position = tf.contrib.layers.sparse_column_with_hash_bucket("fold_position", hash_bucket_size=10) 

# intersecting multiple cols with CrossedColumn 
width_x_height = tf.contrib.layers.crossed_column([width, height], hash_bucket_size=10000) 

# building the model 
model_dir = tempfile.mkdtemp() 

m = tf.contrib.learn.LinearRegressor(feature_columns=[ 
    geo_region, venue_id, seller_member_id, site_domain, tag_id, fold_position, width_x_height], 
    optimizer=tf.train.FtrlOptimizer(
    learning_rate=0.1, 
    l1_regularization_strength=1.0, 
    l2_regularization_strength=1.0), 
    model_dir=model_dir) 

# train model 
m.fit(input_fn=train_input_fn, steps=200) 

次のように私のエラートレースは、次のとおりです。

TypeError         Traceback (most recent call last) 
<ipython-input-83-4f4e07dac1eb> in <module>() 
    11 
    12 # train model 
---> 13 m.fit(input_fn=train_input_fn, steps=200) 

/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.pyc in fit(self, x, y, input_fn, steps, batch_size, monitors, max_steps) 
    238        steps=steps, 
    239        monitors=monitors, 
--> 240        max_steps=max_steps) 
    241  logging.info('Loss for final step: %s.', loss) 
    242  return self 

/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.pyc in _train_model(self, input_fn, steps, feed_fn, init_op, init_feed_fn, init_fn, device_fn, monitors, log_every_steps, fail_on_nan_loss, max_steps) 
    548  features, targets = input_fn() 
    549  self._check_inputs(features, targets) 
--> 550  train_op, loss_op = self._get_train_ops(features, targets) 
    551 
    552  # Add default monitors. 

/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/linear.pyc in _get_train_ops(self, features, targets) 
    334  raise ValueError("SDCAOptimizer does not currently support regression.") 
    335  self._validate_linear_feature_columns(features) 
--> 336  return super(LinearRegressor, self)._get_train_ops(features, targets) 
    337 
    338 def _get_eval_ops(self, features, targets, metrics=None): 

/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.pyc in _get_train_ops(self, features, targets) 
    180 
    181  features = self._get_feature_dict(features) 
--> 182  logits = self._logits(features, is_training=True) 
    183  if self._enable_centered_bias: 
    184  centered_bias_step = [self._centered_bias_step(targets, features)] 

/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.pyc in _logits(self, features, is_training) 
    269  logits = self._dnn_logits(features, is_training) 
    270  else: 
--> 271  logits = self._linear_logits(features, is_training) 
    272 
    273  if self._enable_centered_bias: 

/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.pyc in _linear_logits(self, features, is_training) 
    231 def _linear_logits(self, features, is_training): 
    232  return self._linear_model.build_model(
--> 233   features, self._linear_feature_columns, is_training) 
    234 
    235 def _centered_bias(self): 

/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/composable_model.pyc in build_model(self, features, feature_columns, is_training) 
    175   num_outputs=self._num_label_columns, 
    176   weight_collections=[self._weight_collection_name], 
--> 177   scope=scope) 
    178  return logits 
    179 

/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/layers/python/layers/feature_column_ops.pyc in weighted_sum_from_feature_columns(columns_to_tensors, feature_columns, num_outputs, weight_collections, trainable, scope) 
    176  for column in sorted(set(feature_columns), key=lambda x: x.key): 
    177  try: 
--> 178   transformed_tensor = transformer.transform(column) 
    179   predictions, variable = column.to_weighted_sum(transformed_tensor, 
    180              num_outputs, 

/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/layers/python/layers/feature_column_ops.pyc in transform(self, feature_column) 
    382  return self._columns_to_tensors[feature_column] 
    383 
--> 384  feature_column.insert_transformed_feature(self._columns_to_tensors) 
    385 
    386  if feature_column not in self._columns_to_tensors: 

/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/layers/python/layers/feature_column.pyc in insert_transformed_feature(self, columns_to_tensors) 
    362   columns_to_tensors[self.name].values, 
    363   self.bucket_size, 
--> 364   name=self.name + "_lookup") 
    365  columns_to_tensors[self] = ops.SparseTensor(
    366   columns_to_tensors[self.name].indices, sparse_id_values, 

/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/gen_string_ops.pyc in string_to_hash_bucket_fast(input, num_buckets, name) 
    183 """ 
    184 result = _op_def_lib.apply_op("StringToHashBucketFast", input=input, 
--> 185         num_buckets=num_buckets, name=name) 
    186 return result 
    187 

/Users/dennisy/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.pyc in apply_op(self, op_type_name, name, **keywords) 
    461    if input_arg.type != types_pb2.DT_INVALID: 
    462    raise TypeError("%s expected type of %s." % 
--> 463        (prefix, dtypes.as_dtype(input_arg.type).name)) 
    464    else: 
    465    raise TypeError(

TypeError: Input 'input' of 'StringToHashBucketFast' Op has type int64 that does not match expected type of string. 

私はStringToHashBucketFast操作に渡しています何を入力かなり分かりません。私はすべての別々の作品を試して、彼らは離れてから呼び出すときに実行するfit

助けてくれるスマートな男を楽しみにして!

答えて

2

StringToHashBucketFastint64で機能しません。 このエラーメッセージの意味は、1つまたは複数の機能列を文字列(暗黙的にはtf.contrib.layers.sparse_column_with_hash_bucketを使用)として宣言しましたが、指定するフィーチャ値はint64です。

関連する問題