Wie man * richtig * Daten von CSVs in TensorFlow liest
Ich bin auf @ gestoßdies so Entsendung zeigt uns, wie der Code zum Einlesen von CSV-Dateien mithilfe einer Warteschlange eingerichtet wird. Bei jeder Ausführung ist jedoch ein Fehler aufgetreten. Ich habe versucht, es zu debuggen, kann aber nicht herausfinden, was der Fehler bedeutet. Kann mir jemand weiterhelfen?
Der Code, den ich verwende, ist fast wörtlich, was in dem obigen Beitrag geschrieben wurde:
import tensorflow as tf
dataset = '/Users/hdadmin/Data/actions/testing.csv'
def file_len(fname):
with open(fname) as f:
for i, l in enumerate(f):
pass
return i + 1
def read_from_csv(filename_queue):
reader = tf.TextLineReader(skip_header_lines=1)
_, csv_row = reader.read(filename_queue)
record_defaults = [[0],[0],[0],[0],[0]]
colHour,colQuarter,colAction,colUser,colLabel = tf.decode_csv(csv_row, record_defaults=record_defaults)
features = tf.pack([colHour,colQuarter,colAction,colUser])
label = tf.pack([colLabel])
return features, label
def input_pipeline(batch_size, num_epochs=None):
filename_queue = tf.train.string_input_producer([dataset], num_epochs=num_epochs, shuffle=True)
example, label = read_from_csv(filename_queue)
min_after_dequeue = 1000
capacity = min_after_dequeue + 3 * batch_size
example_batch, label_batch = tf.train.shuffle_batch(
[example, label], batch_size=batch_size, capacity=capacity,
min_after_dequeue=min_after_dequeue)
return example_batch, label_batch
file_length = file_len(dataset) - 1
examples, labels = input_pipeline(file_length, 1)
with tf.Session() as sess:
tf.initialize_all_variables().run()
# start populating filename queue
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
try:
while not coord.should_stop():
example_batch, label_batch = sess.run([examples, labels])
print(example_batch)
except tf.errors.OutOfRangeError:
print('Done training, epoch reached')
finally:
coord.request_stop()
coord.join(threads)
Der Fehler, den ich erhalte, ist:
E tensorflow/core/client/tensor_c_api.cc:485] Attempting to use uninitialized value input_producer/limit_epochs/epochs
[[Node: input_producer/limit_epochs/CountUpTo = CountUpTo[T=DT_INT64, _class=["loc:@input_producer/limit_epochs/epochs"], limit=1, _device="/job:localhost/replica:0/task:0/cpu:0"](input_producer/limit_epochs/epochs)]]
E tensorflow/core/client/tensor_c_api.cc:485] RandomShuffleQueue '_2_shuffle_batch/random_shuffle_queue' is closed and has insufficient elements (requested 10000, current size 0)
[[Node: shuffle_batch = QueueDequeueMany[_class=["loc:@shuffle_batch/random_shuffle_queue"], component_types=[DT_INT32, DT_INT32], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](shuffle_batch/random_shuffle_queue, shuffle_batch/n)]]
Done training, epoch reached
E tensorflow/core/client/tensor_c_api.cc:485] FIFOQueue '_0_input_producer' is closed and has insufficient elements (requested 1, current size 0)
[[Node: ReaderRead = ReaderRead[_class=["loc:@TextLineReader", "loc:@input_producer"], _device="/job:localhost/replica:0/task:0/cpu:0"](TextLineReader, input_producer)]]
E tensorflow/core/client/tensor_c_api.cc:485] Queue '_2_shuffle_batch/random_shuffle_queue' is already closed.
[[Node: shuffle_batch/random_shuffle_queue_Close = QueueClose[_class=["loc:@shuffle_batch/random_shuffle_queue"], cancel_pending_enqueues=false, _device="/job:localhost/replica:0/task:0/cpu:0"](shuffle_batch/random_shuffle_queue)]]
Traceback (most recent call last):
File "csv_test.py", line 49, in <module>
coord.join(threads)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/coordinator.py", line 357, in join
six.reraise(*self._exc_info_to_raise)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/queue_runner.py", line 185, in _run
sess.run(enqueue_op)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 382, in run
run_metadata_ptr)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 655, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 723, in _do_run
target_list, options, run_metadata)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 743, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors.FailedPreconditionError: Attempting to use uninitialized value input_producer/limit_epochs/epochs
[[Node: input_producer/limit_epochs/CountUpTo = CountUpTo[T=DT_INT64, _class=["loc:@input_producer/limit_epochs/epochs"], limit=1, _device="/job:localhost/replica:0/task:0/cpu:0"](input_producer/limit_epochs/epochs)]]
Caused by op u'input_producer/limit_epochs/CountUpTo', defined at:
File "csv_test.py", line 31, in <module>
examples, labels = input_pipeline(file_length, 1)
File "csv_test.py", line 21, in input_pipeline
filename_queue = tf.train.string_input_producer([dataset], num_epochs=num_epochs, shuffle=True)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/input.py", line 194, in string_input_producer
summary_name="fraction_of_%d_full" % capacity)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/input.py", line 133, in input_producer
input_tensor = limit_epochs(input_tensor, num_epochs)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/input.py", line 84, in limit_epochs
counter = epochs.count_up_to(num_epochs)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 577, in count_up_to
return state_ops.count_up_to(self._variable, limit=limit)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/gen_state_ops.py", line 127, in count_up_to
result = _op_def_lib.apply_op("CountUpTo", ref=ref, limit=limit, name=name)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 703, in apply_op
op_def=op_def)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2310, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1232, in __init__
self._traceback = _extract_stack()
Ich habe Daten aus fünf Spalten zusammengestellt, die mit dem Beispiel übereinstimmen. Es ist etwas in der Art von:
"v1","v2","v3","v4","v5"
1,1,1,3,10
4,2,1,10,8
1,4,1,9,3
3,3,1,1,5
3,4,1,4,3
3,2,1,5,8
1,1,1,9,7
4,1,1,4,9
2,3,1,8,4
Vielen Dank vor der Zeit.