import tensorflow as tf
import numpy as np
#############
# 一、自带数据集 ################
#载入上一级文件夹中,data文件夹下的mnist.npz文件
mnist_data = np.load('../data/mnist.npz')
x = mnist_data['x_train']
y = mnist_data['y_train']
x_test = mnist_data['x_test']
y_test = mnist_data['y_test']
# 打印出x, y, x_text, y_text的形状
# 在合适位置设置断点,将打印结果截图上传学习通。
print('x:',x.shape,'y:',y.shape,'x_test:',x_test.shape,'y_shape:',y_test.shape)
# 将加载的数据转换为Dataset对象
train = tf.data.Dataset.from_tensor_slices((x,y))
test = tf.data.Dataset.from_tensor_slices((x_test,y_test))
#############
# 二、加载CSV文件 ################
import pandas as pd
#载入上一级文件夹中,data文件夹下的titanic_file.csv文件
titanic_file = pd.read_csv('../data/titanic_file.csv')
titanic_slices = tf.data.Dataset.from_tensor_slices(dict(titanic_file))
for feature_batch in titanic_slices.take(1): # 采用take函数在列轴上的位置1处取值
for key, value in feature_batch.items(): # 返回遍历的键和值
print('{!r:20s}: {}'.format(key, value)) # 打印键与值
titanic_batches = tf.data.experimental.make_csv_dataset('../data/titanic_file.csv', batch_size=4,
label_name='survived')
for feature_batch, label_batch in titanic_batches.take(1):
print('survived: {}'.format(label_batch))
print('features:')
for key, value in feature_batch.items():
print('{!r:20s}: {}'.format(key, value))
#############
# 三、加载TFRecord文件 ################
# 载入上一级文件夹中,data文件夹下的fsns.tfrec文件
dataset = tf.data.TFRecordDataset(filenames = ['../data/fsns.tfrec'])
print(dataset)
raw_example = next(iter(dataset))
parsed = tf.train.Example.FromString(raw_example.numpy()) # 解析TFRecord
print(parsed.features.feature['image/text']) # 输出检查
#############
# 四、加载文本文件 ################
# 载入上一级文件夹中,data文件夹下的cowper.txt文件
cowper = tf.data.TextLineDataset('../data/cowper.txt')
for line in cowper.take(5):
print(line.numpy())
#############
# 五、加载文件集 ################
import random
import pathlib
# 载入上一级文件夹中,data文件夹下的flower_photos文件夹
data_path = pathlib.path('../data/flower_photos')
# 在此处写出下列语句的意义,并截图上传学习通:
all_image_paths = list(data_path.glob('*/*'))
# 将下述列表生成式语句改写为普通的for语句形式,截图上传学习通,并代替原有语句进行运行
# all_image_paths = [str(path) for path in all_image_paths]
random.shuffle(all_image_paths) # 打散数据
image_count = len(all_image_paths)
print('数据大小:', image_count)
# 查看5张图片
print('5张图片', all_image_paths[:5])
# 在此处写出下列语句的意义,并截图上传学习通:
label_names = sorted(item.name for item in data_path.glob('*/') if item.is_dir())
print('分类名', label_names)
# 创建标签
label_to_index = dict((name, index) for index, name in enumerate(label_names))
print('标签', label_to_index)
# 将图片与标签对应
all_image_labels = [label_to_index[pathlib.Path(
path).parent.name] for path in all_image_paths]
for image, label in zip(all_image_paths[:5], all_image_labels[:5]):
print(image, ' ---> ', label)
# 转化为Dataset对象实例
ds = tf.data.Dataset.from_tensor_slices((all_image_paths, all_image_labels))