Tensorflow remodelando un tensor

Estoy tratando de usartf.nn.sparse_softmax_cross_entropy_with_logits y he seguido la respuesta del usuario Olivier Moindrot [aquí] [1] pero recibo un error de dimensión

Estoy construyendo una red de segmentación, por lo que la imagen de entrada es 200x200 y la imagen de salida es 200x200. La clasificación es binaria, por lo tanto, en primer plano y en segundo plano.

Después de construir la CNNpred = conv_net(x, weights, biases, keep_prob)

pred Se ve como esto<tf.Tensor 'Add_1:0' shape=(?, 40000) dtype=float32>

La CNN tiene un par de capas conv seguidas de una capa completamente conectada. La capa completamente conectada es 40000 porque tiene una superficie plana de 200x200.

De acuerdo con el enlace anterior, cambio la formapred al igual que...

(nota al margen: también intenté empacartf.pack() dospred's, como arriba, juntos, pero pensé que eso estaba mal)

pred = tf.reshape(pred, [-1, 200, 200, 2])

... para que haya 2 clasificaciones. Continuando con el enlace de arriba ...

temp_pred = tf.reshape(pred, [-1,2])
temp_y = tf.reshape(y, [-1])
cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(temp_pred, temp_y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

Tengo los siguientes marcadores de posición y datos de lote ...

x = tf.placeholder(tf.float32, [None, 200, 200])
y = tf.placeholder(tf.int64, [None, 200, 200])
(Pdb) batch_x.shape
(10, 200, 200)
(Pdb) batch_y.shape
(10, 200, 200)

Cuando ejecuto una sesión de entrenamiento, aparece el siguiente error de dimensión:

tensorflow.python.framework.errors.InvalidArgumentError: logits first
dimension must match labels size.  logits shape=[3200000,2] labels 
shape=[400000]

Mi código completo se ve así:

import tensorflow as tf
import pdb
import numpy as np

# Import MINST data
# from tensorflow.examples.tutorials.mnist import input_data
# mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)


# Parameters
learning_rate = 0.001
training_iters = 200000
batch_size = 10
display_step = 1

# Network Parameters
n_input = 200 # MNIST data input (img shape: 28*28)
n_classes = 2 # MNIST total classes (0-9 digits)
n_output = 40000
#n_input = 200

dropout = 0.75 # Dropout, probability to keep units

# tf Graph input
x = tf.placeholder(tf.float32, [None, n_input, n_input])
y = tf.placeholder(tf.int64, [None, n_input, n_input])
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)


# Create some wrappers for simplicity
def conv2d(x, W, b, strides=1):
    # Conv2D wrapper, with bias and relu activation
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)

def maxpool2d(x, k=2):
    # MaxPool2D wrapper
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
                          padding='SAME')


# Create model
def conv_net(x, weights, biases, dropout):
    # Reshape input picture
    x = tf.reshape(x, shape=[-1, 200, 200, 1])

    # Convolution Layer
    conv1 = conv2d(x, weights['wc1'], biases['bc1'])
    # Max Pooling (down-sampling)
    # conv1 = tf.nn.local_response_normalization(conv1)
    # conv1 = maxpool2d(conv1, k=2)

    # Convolution Layer
    conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
    # Max Pooling (down-sampling)
    # conv2 = tf.nn.local_response_normalization(conv2)
    # conv2 = maxpool2d(conv2, k=2)

    # Convolution Layer
    conv3 = conv2d(conv2, weights['wc3'], biases['bc3'])
    # # Max Pooling (down-sampling)
    # conv3 = tf.nn.local_response_normalization(conv3)
    # conv3 = maxpool2d(conv3, k=2)

    # return conv3

    # Fully connected layer
    # Reshape conv2 output to fit fully connected layer input
    fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
    fc1 = tf.nn.relu(fc1)
    # Apply Dropout
    fc1 = tf.nn.dropout(fc1, dropout)

    return tf.add(tf.matmul(fc1, weights['out']), biases['out'])

    # Output, class prediction
    # output = []
    # for i in xrange(2):
    #     # output.append(tf.nn.softmax(tf.add(tf.matmul(fc1, weights['out']), biases['out'])))
    #     output.append((tf.add(tf.matmul(fc1, weights['out']), biases['out'])))
    #
    # return output

# Store layers weight & bias
weights = {
    # 5x5 conv, 1 input, 32 outputs
    'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
    # 5x5 conv, 32 inputs, 64 outputs
    'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
    # 5x5 conv, 32 inputs, 64 outputs
    'wc3': tf.Variable(tf.random_normal([5, 5, 64, 128])),
    # fully connected, 7*7*64 inputs, 1024 outputs
    'wd1': tf.Variable(tf.random_normal([50*50*64, 1024])),
    # 1024 inputs, 10 outputs (class prediction)
    'out': tf.Variable(tf.random_normal([1024, n_output]))
}

biases = {
    'bc1': tf.Variable(tf.random_normal([32])),
    'bc2': tf.Variable(tf.random_normal([64])),
    'bc3': tf.Variable(tf.random_normal([128])),
    'bd1': tf.Variable(tf.random_normal([1024])),
    'out': tf.Variable(tf.random_normal([n_output]))
}

# Construct model
pred = conv_net(x, weights, biases, keep_prob)
pdb.set_trace()
# pred = tf.pack(tf.transpose(pred,[1,2,0]))
pred = tf.reshape(pred, [-1, n_input, n_input, 2])
temp_pred = tf.reshape(pred, [-1,2])
temp_y = tf.reshape(y, [-1])
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(temp_pred, temp_y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Evaluate model
# correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
temp_pred2 = tf.reshape(pred, [-1,n_input,n_input])
correct_pred = tf.equal(tf.cast(y,tf.float32),tf.sub(temp_pred2,tf.cast(y,tf.float32)))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initializing the variables
init = tf.initialize_all_variables()

# Launch the graph
with tf.Session() as sess:
    sess.run(init)
    summ = tf.train.SummaryWriter('/tmp/logdir/', sess.graph_def)
    step = 1
    from tensorflow.contrib.learn.python.learn.datasets.scroll import scroll_data
    data = scroll_data.read_data('/home/kendall/Desktop/')
    # Keep training until reach max iterations
    while step * batch_size < training_iters:
        batch_x, batch_y = data.train.next_batch(batch_size)
        # Run optimization op (backprop)
        batch_x = batch_x.reshape((batch_size, n_input, n_input))
        batch_y = batch_y.reshape((batch_size, n_input, n_input))
        batch_y = np.int64(batch_y)
        # y = tf.reshape(y, [-1,n_input,n_input])
        pdb.set_trace()
        sess.run(optimizer, feed_dict={x: batch_x, y: batch_y, keep_prob: dropout})
        if step % display_step == 0:
            # Calculate batch loss and accuracy
            pdb.set_trace()
            loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x, y: batch_y, keep_prob: 1.})
            print "Iter " + str(step*batch_size) + ", Minibatch Loss= " + \
                  "{:.6f}".format(loss) + ", Training Accuracy= " + \
                  "{:.5f}".format(acc)
        step += 1
    print "Optimization Finished!"

    # Calculate accuracy for 256 mnist test images
    print "Testing Accuracy:", \
        sess.run(accuracy, feed_dict={x: data.test.images[:256],
                                      y: data.test.labels[:256],
                                      keep_prob: 1.})



  [1]: http://stackoverflow.com/questions/35317029/how-to-implement-pixel-wise-classification-for-scene-labeling-in-tensorflow/37294185?noredirect=1#comment63253577_37294185

Respuestas a la pregunta(2)

Su respuesta a la pregunta