介绍

做这个主要是发现下载数据集,下载下来的都是些readme,batch什么的东东,不是我想看的那种直接的照片,就找了些资料,自己整整

看了readme官方的描述,下载的有5个batch的训练集和1个batch的测试集,每一个batch有10000张,就整了下面的代码

目录结构

└─data
    ├─cifar-10-batches-py
    ├─test
    │  ├─0
    │  ├─1
    │  ├─2
    │  ├─3
    │  ├─4
    │  ├─5
    │  ├─6
    │  ├─7
    │  ├─8
    │  └─9
    └─train
        ├─0
        ├─1
        ├─2
        ├─3
        ├─4
        ├─5
        ├─6
        ├─7
        ├─8
        └─9

代码

import numpy as np
import pickle
import os
from torchvision import datasets
from imageio import imwrite
#数据集放置路径
data_save_pth = "./data"
train_pth = os.path.join(data_save_pth, "train")
test_pth = os.path.join(data_save_pth, "test")
if not os.path.exists(train_pth and test_pth):
    os.makedirs(train_pth)
    os.makedirs(test_pth)
#解压路径
base_dit = "./data"
data_dir = os.path.join(base_dit,"cifar-10-batches-py")
train_o_dir = train_pth
test_o_dir  = test_pth

# 数据集下载
def DataDownload():
    datasets.CIFAR10(root=train_pth,train=True,download=True)
#创建目录
def my_mkdir(my_dir):
    if not os.path.exists(my_dir):
        os.makedirs(my_dir)
#解压缩包
def unpickle(file):
    with open(file,"rb") as fo:
        dict_ = pickle.load(fo,encoding="bytes")
    return dict_


if __name__ == '__main__':
    # DataDownload()
    # unpickle(file="train_pth")
    """
    解压缩步骤:
    训练集
    ·遍历data_batch_12345所在路径
    ·unpickle解压存放在train_data里
    ·取train_data中的[b'data']
    ·使用imageio的imwrite方法保存
    测试集同理
    """
    #训练集
    for j in range(1,6):
        pth = os.path.join(data_dir,"data_batch_"+str(j))
        train_data = unpickle(pth)
        print(pth+"is loading...")

        for i in range(0,10000):
            #图片数据  train[b'data']的shape为1 需要还原为(3,32,32)
            img = np.reshape(train_data[b'data'][i],(3,32,32))
            img = img.transpose(1,2,0)    #imwrite的input格式为(h,w,c)
            label = str(train_data[b'labels'][i])
            o_dir = os.path.join(train_o_dir,label)
            my_mkdir(o_dir)

            img_name = label + '_' +str(i+(j-1)*10000) + '.png'
            img_pth = os.path.join(o_dir,img_name)
            imwrite(img_pth,img)
        print(pth + "loaded")
    #测试集
    test_data_pth = os.path.join(data_dir,"test_batch")
    test_data = unpickle(test_data_pth)
    for i in range(0, 10000):
        # 图片数据
        img = np.reshape(test_data[b'data'][i], (3, 32, 32))
        img = img.transpose(1, 2, 0)
        label = str(test_data[b'labels'][i])
        o_dir = os.path.join(test_o_dir, label)
        my_mkdir(o_dir)

        img_name = label + '_' + str(i + (j - 1) * 10000) + '.png'
        img_pth = os.path.join(o_dir, img_name)
        imwrite(img_pth, img)
    print("test_batch  loaded")

如有错误,还请指正

Logo

有“AI”的1024 = 2048,欢迎大家加入2048 AI社区

更多推荐