python – tensorflow feed list功能(多热)到tf.estimator

某些功能列的数据类型是list.它们的长度可以不同.我想将此列编码为多热门分类功能并将其提供给tf.estimator.我尝试了以下但错误无法获取元素显示字节.我认为这是深度学习中的常见做法,尤其是推荐系统,例如：深度与宽模型.我找到了一个相关问题here,但它没有显示如何提供给估算器.

import pandas as pd
import tensorflow as tf

OUTDIR = "./data"

data = {"x": [["a", "c"], ["a", "b"], ["b", "c"]], "y": ["x", "y", "z"]}
df = pd.DataFrame(data)

Y = df["y"]
X = df.drop("y", axis=1)

indicator_features = [
    tf.feature_column.indicator_column(
        categorical_column=tf.feature_column.categorical_column_with_vocabulary_list(
            key="x", vocabulary_list=["a", "b", "c"]
        )
    )
]

model = tf.estimator.LinearClassifier(
    feature_columns=indicator_features, model_dir=OUTDIR
)

training_input_fn = tf.estimator.inputs.pandas_input_fn(
    x=X, y=Y, batch_size=64, shuffle=True, num_epochs=None
)

model.train(input_fn=training_input_fn)

以下错误：

INFO:tensorflow:Using default config. INFO:tensorflow:Using config:
{‘_model_dir’: ‘testalg’, ‘_tf_random_seed’: None,
‘_save_summary_steps’: 100, ‘_save_checkpoints_steps’: None,
‘_save_checkpoints_secs’: 600, ‘_session_config’: None,
‘_keep_checkpoint_max’: 5, ‘_keep_checkpoint_every_n_hours’: 10000,
‘_log_step_count_steps’: 100, ‘_train_distribute’: None, ‘_device_fn’:
None, ‘_service’: None, ‘_cluster_spec’:
, ‘_task_type’: ‘worker’, ‘_task_id’: 0,
‘_global_id_in_cluster’: 0, ‘_master’: ”, ‘_evaluation_master’: ”,
‘_is_chief’: True, ‘_num_ps_replicas’: 0, ‘_num_worker_replicas’: 1}
INFO:tensorflow:Calling model_fn. INFO:tensorflow:Done calling
model_fn. INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized. INFO:tensorflow:Running
local_init_op. INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Error reported to Coordinator: , Unable to
get element as bytes. INFO:tensorflow:Saving checkpoints for 0 into
testalg/model.ckpt.
——————————————————- InternalError Traceback (most recent call last)
/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py
in _do_call(self, fn, *args) 1321 try:
-> 1322 return fn(*args) 1323 except errors.OpError as e:

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py
in _run_fn(feed_dict, fetch_list, target_list, options, run_metadata)
1306 return self._call_tf_sessionrun(
-> 1307 options, feed_dict, fetch_list, target_list, run_metadata) 1308

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py
in _call_tf_sessionrun(self, options, feed_dict, fetch_list,
target_list, run_metadata) 1408 self._session, options,
feed_dict, fetch_list, target_list,
-> 1409 run_metadata) 1410 else:

InternalError: Unable to get element as bytes.

During handling of the above exception, another exception occurred:

InternalError Traceback (most recent call last)
in ()
44
45
—> 46 model.train(input_fn=training_input_fn)

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py
in train(self, input_fn, hooks, steps, max_steps, saving_listeners)
364
365 saving_listeners = _check_listeners_type(saving_listeners)
–> 366 loss = self._train_model(input_fn, hooks, saving_listeners)
367 logging.info(‘Loss for final step: %s.’, loss)
368 return self

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py
in _train_model(self, input_fn, hooks, saving_listeners) 1117
return self._train_model_distributed(input_fn, hooks,
saving_listeners) 1118 else:
-> 1119 return self._train_model_default(input_fn, hooks, saving_listeners) 1120 1121 def _train_model_default(self,
input_fn, hooks, saving_listeners):

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py
in _train_model_default(self, input_fn, hooks, saving_listeners)
1133 return self._train_with_estimator_spec(estimator_spec,
worker_hooks, 1134
hooks, global_step_tensor,
-> 1135 saving_listeners) 1136 1137 def _train_model_distributed(self, input_fn, hooks,
saving_listeners):

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py
in _train_with_estimator_spec(self, estimator_spec, worker_hooks,
hooks, global_step_tensor, saving_listeners) 1334 loss = None
1335 while not mon_sess.should_stop():
-> 1336 _, loss = mon_sess.run([estimator_spec.train_op, estimator_spec.loss]) 1337 return loss 1338

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py
in exit(self, exception_type, exception_value, traceback)
687 if exception_type in [errors.OutOfRangeError, StopIteration]:
688 exception_type = None
–> 689 self._close_internal(exception_type)
690 # exit should return True to suppress an exception.
691 return exception_type is None

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py
in _close_internal(self, exception_type)
724 if self._sess is None:
725 raise RuntimeError(‘Session is already closed.’)
–> 726 self._sess.close()
727 finally:
728 self._sess = None

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py
in close(self)
972 if self._sess:
973 try:
–> 974 self._sess.close()
975 except _PREEMPTION_ERRORS:
976 pass

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py
in close(self) 1116 self._coord.join( 1117
stop_grace_period_secs=self._stop_grace_period_secs,
-> 1118 ignore_live_threads=True) 1119 finally: 1120 try:

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/coordinator.py
in join(self, threads, stop_grace_period_secs, ignore_live_threads)
387 self._registered_threads = set()
388 if self._exc_info_to_raise:
–> 389 six.reraise(*self._exc_info_to_raise)
390 elif stragglers:
391 if ignore_live_threads:

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/six.py in
reraise(tp, value, tb)
683 value = tp()
684 if value.traceback is not tb:
–> 685 raise value.with_traceback(tb)
686 raise value
687

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/estimator/inputs/queues/feeding_queue_runner.py
in _run(self, sess, enqueue_op, feed_fn, coord)
92 try:
93 feed_dict = None if feed_fn is None else feed_fn()
—> 94 sess.run(enqueue_op, feed_dict=feed_dict)
95 except (errors.OutOfRangeError, errors.CancelledError):
96 # This exception indicates that a queue was closed.

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py
in run(self, fetches, feed_dict, options, run_metadata)
898 try:
899 result = self._run(None, fetches, feed_dict, options_ptr,
–> 900 run_metadata_ptr)
901 if run_metadata:
902 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py
in _run(self, handle, fetches, feed_dict, options, run_metadata)
1133 if final_fetches or final_targets or (handle and
feed_dict_tensor): 1134 results = self._do_run(handle,
final_targets, final_fetches,
-> 1135 feed_dict_tensor, options, run_metadata) 1136 else: 1137 results = []

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py
in _do_run(self, handle, target_list, fetch_list, feed_dict, options,
run_metadata) 1314 if handle is None: 1315 return
self._do_call(_run_fn, feeds, fetches, targets, options,
-> 1316 run_metadata) 1317 else: 1318 return self._do_call(_prun_fn, handle, feeds, fetches)

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py
in _do_call(self, fn, *args) 1333 except KeyError: 1334
pass
-> 1335 raise type(e)(node_def, op, message) 1336 1337 def _extend_graph(self):

InternalError: Unable to get element as bytes.

解决方法:

我认为你的情况中的一个问题是pandas中的列类型实际上是对象而不是字符串.如果您将其转换为单独的字符串列,您将摆脱此错误.请记住,基本的TensorFlow tf.string dtype允许您构建字节字符串的张量.并且当您在此列中存储对象而不是字符串时,您会收到错误.

下面的代码将克服您上面得到的错误,但它不会完全解决您的问题.列表的变量长度必须通过填充或列表或类似的东西来处理,因为indicator_column可能在处理缺失值时遇到问题.

X2= pd.DataFrame(X['x'].values.tolist(), columns=['x1','x2'])

feat1 = tf.feature_column.categorical_column_with_vocabulary_list(
            key="x1", vocabulary_list=["a", "b", "c"]
        )
feat2 = tf.feature_column.categorical_column_with_vocabulary_list(
            key="x2", vocabulary_list=["a", "b", "c"]
        )
indicator_features = [
    tf.feature_column.indicator_column(
        categorical_column=feat1
    ),tf.feature_column.indicator_column(
        categorical_column=feat2
    )
]

training_input_fn = tf.estimator.inputs.pandas_input_fn(
    x=X2, y=Y, batch_size=64, shuffle=True, num_epochs=None
)

python – tensorflow feed list功能(多热)到tf.estimator

相关推荐