教程 | 如何在TensorFlow中高效使用数据集

2018-02-12 10:48:38来源:http://tech.ifeng.com/a/20180211/44878410_0.shtml作者:凤凰科技人点击

# create a random vector of shape (100,2)
x = np.random.sample((100,2))
# make a dataset from a numpy array
dataset = tf.data.Dataset.from_tensor_slices(x)

features, labels = (np.random.sample((100,2)), np.random.sample((100,1)))
dataset = tf.data.Dataset.from_tensor_slices((features,labels))

# using a tensor
dataset = tf.data.Dataset.from_tensor_slices(tf.random_uniform([100, 2]))

x = tf.placeholder(tf.float32, shape=[None,2])
dataset = tf.data.Dataset.from_tensor_slices(x)

sequence = np.array([[1],[2,3],[3,4]])
def generator():
for el in sequence:
yield el
dataset = tf.data.Dataset().from_generator(generator,
output_types=tf.float32,
output_shapes=[tf.float32])

One Shot 迭代器

x = np.random.sample((100,2))
# make a dataset from a numpy array
dataset = tf.data.Dataset.from_tensor_slices(x)
# create the iterator
iter = dataset.make_one_shot_iterator()

...
# create the iterator
iter = dataset.make_one_shot_iterator()
el = iter.get_next()

with tf.Session() as sess:
print(sess.run(el)) # output: [ 0.42116176 0.40666069]

# using a placeholder
x = tf.placeholder(tf.float32, shape=[None,2])
dataset = tf.data.Dataset.from_tensor_slices(x)
data = np.random.sample((100,2))
iter = dataset.make_initializable_iterator() # create the iterator
el = iter.get_next()
with tf.Session() as sess:
# feed the placeholder with data
sess.run(iter.initializer, feed_dict={ x: data })
print(sess.run(el)) # output [ 0.52374458 0.71968478]

train_data = (np.random.sample((100,2)), np.random.sample((100,1)))
test_data = (np.array([[1,2]]), np.array([[0]]))

# initializable iterator to switch between dataset
EPOCHS = 10
x, y = tf.placeholder(tf.float32, shape=[None,2]), tf.placeholder(tf.float32, shape=[None,1])
dataset = tf.data.Dataset.from_tensor_slices((x, y))
train_data = (np.random.sample((100,2)), np.random.sample((100,1)))
test_data = (np.array([[1,2]]), np.array([[0]]))
iter = dataset.make_initializable_iterator()
features, labels = iter.get_next()
with tf.Session() as sess:
# initialise iterator with train data
sess.run(iter.initializer, feed_dict={ x: train_data[0], y: train_data[1]})
for _ in range(EPOCHS):
sess.run([features, labels])
# switch to test data
sess.run(iter.initializer, feed_dict={ x: test_data[0], y: test_data[1]})
print(sess.run([features, labels]))

# making fake data using numpy
train_data = (np.random.sample((100,2)), np.random.sample((100,1)))
test_data = (np.random.sample((10,2)), np.random.sample((10,1)))

# create two datasets, one for training and one for test
train_dataset = tf.data.Dataset.from_tensor_slices(train_data)
test_dataset = tf.data.Dataset.from_tensor_slices(test_data)

# create a iterator of the correct shape and type
iter = tf.data.Iterator.from_structure(train_dataset.output_types,
train_dataset.output_shapes)

# create the initialisation operations
train_init_op = iter.make_initializer(train_dataset)
test_init_op = iter.make_initializer(test_dataset)

features, labels = iter.get_next()

# Reinitializable iterator to switch between Datasets
EPOCHS = 10
# making fake data using numpy
train_data = (np.random.sample((100,2)), np.random.sample((100,1)))
test_data = (np.random.sample((10,2)), np.random.sample((10,1)))
# create two datasets, one for training and one for test
train_dataset = tf.data.Dataset.from_tensor_slices(train_data)
test_dataset = tf.data.Dataset.from_tensor_slices(test_data)
# create a iterator of the correct shape and type
iter = tf.data.Iterator.from_structure(train_dataset.output_types,
train_dataset.output_shapes)
features, labels = iter.get_next()
# create the initialisation operations
train_init_op = iter.make_initializer(train_dataset)
test_init_op = iter.make_initializer(test_dataset)
with tf.Session() as sess:
sess.run(train_init_op) # switch to train dataset
for _ in range(EPOCHS):
sess.run([features, labels])
sess.run(test_init_op) # switch to val dataset
print(sess.run([features, labels]))

...
next_el = iter.get_next()
...
print(sess.run(next_el)) # will output the current element

# using two numpy arrays
features, labels = (np.array([np.random.sample((100,2))]),
np.array([np.random.sample((100,1))]))
dataset = tf.data.Dataset.from_tensor_slices((features,labels)).repeat().batch(BATCH_SIZE)

iter = dataset.make_one_shot_iterator()
x, y = iter.get_next()

# make a simple model
net = tf.layers.dense(x, 8) # pass the first value from iter.get_next() as input
net = tf.layers.dense(net, 8)
prediction = tf.layers.dense(net, 1)
loss = tf.losses.mean_squared_error(prediction, y) # pass the second value from iter.get_net() as label

# make a simple model
EPOCHS = 10
BATCH_SIZE = 16
# using two numpy arrays
features, labels = (np.array([np.random.sample((100,2))]),
np.array([np.random.sample((100,1))]))
dataset = tf.data.Dataset.from_tensor_slices((features,labels)).repeat().batch(BATCH_SIZE)
iter = dataset.make_one_shot_iterator()
x, y = iter.get_next()
# make a simple model
net = tf.layers.dense(x, 8, activation=tf.tanh) # pass the first value from iter.get_next() as input
net = tf.layers.dense(net, 8, activation=tf.tanh)
prediction = tf.layers.dense(net, 1, activation=tf.tanh)
loss = tf.losses.mean_squared_error(prediction, y) # pass the second value from iter.get_net() as label
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(EPOCHS):
_, loss_value = sess.run([train_op, loss])
print("Iter: {}, Loss: {:.4f}".format(i, loss_value))

Iter: 0, Loss: 0.1328
Iter: 1, Loss: 0.1312
Iter: 2, Loss: 0.1296
Iter: 3, Loss: 0.1281
Iter: 4, Loss: 0.1267
Iter: 5, Loss: 0.1254
Iter: 6, Loss: 0.1242
Iter: 7, Loss: 0.1231
Iter: 8, Loss: 0.1220
Iter: 9, Loss: 0.1210

# BATCHING
BATCH_SIZE = 4
x = np.random.sample((100,2))
# make a dataset from a numpy array
dataset = tf.data.Dataset.from_tensor_slices(x).batch(BATCH_SIZE)
iter = dataset.make_one_shot_iterator()
el = iter.get_next()
with tf.Session() as sess:
print(sess.run(el))

[[ 0.656861280.99373963]
[ 0.696904510.32446826]
[ 0.571484220.68688242]
[ 0.203351160.82473219]]
repeat

shuffle

# BATCHING
BATCH_SIZE = 4
x = np.array([[1],[2],[3],[4]])
# make a dataset from a numpy array
dataset = tf.data.Dataset.from_tensor_slices(x)
dataset = dataset.shuffle(buffer_size=100)
dataset = dataset.batch(BATCH_SIZE)
iter = dataset.make_one_shot_iterator()
el = iter.get_next()
with tf.Session() as sess:
print(sess.run(el))

[[4]
[2]
[3]
[1]]

[[3]
[1]
[2]
[4]]

MAP

# MAP
x = np.array([[1],[2],[3],[4]])
# make a dataset from a numpy array
dataset = tf.data.Dataset.from_tensor_slices(x)
dataset = dataset.map(lambda x: x*2)
iter = dataset.make_one_shot_iterator()
el = iter.get_next()
with tf.Session() as sess:
# this will run forever
for _ in range(len(x)):
print(sess.run(el))

[2]
[4]
[6]
[8]

TensorFlow 数据集教程：https://www.tensorflow.org/programmers_guide/datasets