Wie man * richtig * Daten von CSVs in TensorFlow liest

Ich bin auf @ gestoßdies so Entsendung zeigt uns, wie der Code zum Einlesen von CSV-Dateien mithilfe einer Warteschlange eingerichtet wird. Bei jeder Ausführung ist jedoch ein Fehler aufgetreten. Ich habe versucht, es zu debuggen, kann aber nicht herausfinden, was der Fehler bedeutet. Kann mir jemand weiterhelfen?

Der Code, den ich verwende, ist fast wörtlich, was in dem obigen Beitrag geschrieben wurde:

import tensorflow as tf

dataset = '/Users/hdadmin/Data/actions/testing.csv'

def file_len(fname):
    with open(fname) as f:
        for i, l in enumerate(f):
            pass
    return i + 1

def read_from_csv(filename_queue):
  reader = tf.TextLineReader(skip_header_lines=1)
  _, csv_row = reader.read(filename_queue)
  record_defaults = [[0],[0],[0],[0],[0]]
  colHour,colQuarter,colAction,colUser,colLabel = tf.decode_csv(csv_row, record_defaults=record_defaults)
  features = tf.pack([colHour,colQuarter,colAction,colUser])  
  label = tf.pack([colLabel])  
  return features, label

def input_pipeline(batch_size, num_epochs=None):
  filename_queue = tf.train.string_input_producer([dataset], num_epochs=num_epochs, shuffle=True)  
  example, label = read_from_csv(filename_queue)
  min_after_dequeue = 1000
  capacity = min_after_dequeue + 3 * batch_size
  example_batch, label_batch = tf.train.shuffle_batch(
      [example, label], batch_size=batch_size, capacity=capacity,
      min_after_dequeue=min_after_dequeue)
  return example_batch, label_batch

file_length = file_len(dataset) - 1
examples, labels = input_pipeline(file_length, 1)

with tf.Session() as sess:
  tf.initialize_all_variables().run()

  # start populating filename queue
  coord = tf.train.Coordinator()
  threads = tf.train.start_queue_runners(coord=coord)

  try:
    while not coord.should_stop():
      example_batch, label_batch = sess.run([examples, labels])
      print(example_batch)
  except tf.errors.OutOfRangeError:
    print('Done training, epoch reached')
  finally:
    coord.request_stop()

  coord.join(threads) 

Der Fehler, den ich erhalte, ist:

E tensorflow/core/client/tensor_c_api.cc:485] Attempting to use uninitialized value input_producer/limit_epochs/epochs
     [[Node: input_producer/limit_epochs/CountUpTo = CountUpTo[T=DT_INT64, _class=["loc:@input_producer/limit_epochs/epochs"], limit=1, _device="/job:localhost/replica:0/task:0/cpu:0"](input_producer/limit_epochs/epochs)]]
E tensorflow/core/client/tensor_c_api.cc:485] RandomShuffleQueue '_2_shuffle_batch/random_shuffle_queue' is closed and has insufficient elements (requested 10000, current size 0)
     [[Node: shuffle_batch = QueueDequeueMany[_class=["loc:@shuffle_batch/random_shuffle_queue"], component_types=[DT_INT32, DT_INT32], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](shuffle_batch/random_shuffle_queue, shuffle_batch/n)]]
Done training, epoch reached
E tensorflow/core/client/tensor_c_api.cc:485] FIFOQueue '_0_input_producer' is closed and has insufficient elements (requested 1, current size 0)
     [[Node: ReaderRead = ReaderRead[_class=["loc:@TextLineReader", "loc:@input_producer"], _device="/job:localhost/replica:0/task:0/cpu:0"](TextLineReader, input_producer)]]
E tensorflow/core/client/tensor_c_api.cc:485] Queue '_2_shuffle_batch/random_shuffle_queue' is already closed.
     [[Node: shuffle_batch/random_shuffle_queue_Close = QueueClose[_class=["loc:@shuffle_batch/random_shuffle_queue"], cancel_pending_enqueues=false, _device="/job:localhost/replica:0/task:0/cpu:0"](shuffle_batch/random_shuffle_queue)]]
Traceback (most recent call last):
  File "csv_test.py", line 49, in <module>
    coord.join(threads) 
  File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/coordinator.py", line 357, in join
    six.reraise(*self._exc_info_to_raise)
  File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/queue_runner.py", line 185, in _run
    sess.run(enqueue_op)
  File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 382, in run
    run_metadata_ptr)
  File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 655, in _run
    feed_dict_string, options, run_metadata)
  File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 723, in _do_run
    target_list, options, run_metadata)
  File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 743, in _do_call
    raise type(e)(node_def, op, message)
tensorflow.python.framework.errors.FailedPreconditionError: Attempting to use uninitialized value input_producer/limit_epochs/epochs
     [[Node: input_producer/limit_epochs/CountUpTo = CountUpTo[T=DT_INT64, _class=["loc:@input_producer/limit_epochs/epochs"], limit=1, _device="/job:localhost/replica:0/task:0/cpu:0"](input_producer/limit_epochs/epochs)]]
Caused by op u'input_producer/limit_epochs/CountUpTo', defined at:
  File "csv_test.py", line 31, in <module>
    examples, labels = input_pipeline(file_length, 1)
  File "csv_test.py", line 21, in input_pipeline
    filename_queue = tf.train.string_input_producer([dataset], num_epochs=num_epochs, shuffle=True)
  File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/input.py", line 194, in string_input_producer
    summary_name="fraction_of_%d_full" % capacity)
  File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/input.py", line 133, in input_producer
    input_tensor = limit_epochs(input_tensor, num_epochs)
  File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/input.py", line 84, in limit_epochs
    counter = epochs.count_up_to(num_epochs)
  File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 577, in count_up_to
    return state_ops.count_up_to(self._variable, limit=limit)
  File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/gen_state_ops.py", line 127, in count_up_to
    result = _op_def_lib.apply_op("CountUpTo", ref=ref, limit=limit, name=name)
  File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 703, in apply_op
    op_def=op_def)
  File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2310, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1232, in __init__
    self._traceback = _extract_stack()

Ich habe Daten aus fünf Spalten zusammengestellt, die mit dem Beispiel übereinstimmen. Es ist etwas in der Art von:

"v1","v2","v3","v4","v5"
1,1,1,3,10
4,2,1,10,8
1,4,1,9,3
3,3,1,1,5
3,4,1,4,3
3,2,1,5,8
1,1,1,9,7
4,1,1,4,9
2,3,1,8,4

Vielen Dank vor der Zeit.

Antworten auf die Frage(2)

Ihre Antwort auf die Frage