新手上路
- 积分
- 25
- 金钱
- 25
- 注册时间
- 2018-12-27
- 在线时间
- 3 小时
|
发表于 2020-7-12 23:07:24
|
显示全部楼层
import torch
from torch.utils.data import Dataset
import torchvision
import numpy as np
import cfg
import os
from skimage.transform import resize
from PIL import Image
import math
import dataset
import glob
import time
LABEL_FILE_PATH = "data/person_label.txt"
IMG_BASE_DIR = "data"
IMG_PATH = "data/test"
transforms = torchvision.transforms.Compose([
torchvision.transforms.ToTensor()
])
def one_hot(cls_num, v):
b = np.zeros(cls_num)
b[v] = 1.
return b
class TrainDataset(Dataset):
def __init__(self, img_size=416):
self.files = sorted(glob.glob('%s/*.jpg*' % IMG_PATH))
# print(self.files) # ['data/test\\1.jpg', 'data/test\\2.jpg', 'data/test\\3.jpg', 'data/test\\4.jpg']
self.img_shape = (img_size, img_size)
with open(LABEL_FILE_PATH) as f:
self.dataset = f.readlines() # 打开标签文件 然后 按行读取
def __len__(self):
return len(self.files)
def __getitem__(self, index):
img_path = self.files[index % len(self.files)]
# 一张一张的去处理
# Extract image
img = np.array(Image.open(img_path))
h, w, _ = img.shape
dim_diff = np.abs(h - w)
# Upper (left) and lower (right) padding
pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
# Determine padding
pad = ((pad1, pad2), (0, 0), (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0))
# Add padding
input_img = np.pad(img, pad, 'constant', constant_values=127.5) / 255.
# Resize and normalize
input_img = resize(input_img, (*self.img_shape, 3), mode='reflect')
# 将图片放缩至数据集规定的尺寸, 同时进行归一化操作
# Channels-first
input_img = np.transpose(input_img, (2, 0, 1))
# As pytorch tensor
input_img = torch.from_numpy(input_img).float()
# -------------------------
# Label
# -------------------------
labels = {}
line = self.dataset[index] # 按照索引来取行
print(line)
strs = line.split() # 分割标签 每行按照空格来分割
a = os.path.join(IMG_BASE_DIR, strs[0])
_img_data = Image.open(os.path.join(IMG_BASE_DIR, strs[0])) #第 0 个元素是图片的名字 然后 Image.open 图片绝对路径
img_data = transforms(_img_data) #
# print(img_data.shape)
# print(strs[2])
_boxes = np.array([float(x) for x in strs[1:]]) # 原来strs是list的 这条把strs 变成了numpy
# print("__box: ",type(_boxes[0]))
# _boxes = np.array(list(map(float, strs[1:])))
boxes = np.split(_boxes, len(_boxes) // 5)
# print("adasdasd",type(boxes[0]))
# print("boxes: ",boxes)
for feature_size, anchors in cfg.ANCHORS_GROUP.items(): # feature_size = 13 26 52 anchors = 建议框的宽高
print("feature_size: ",feature_size)
print("anchors: ",anchors)
labels[feature_size] = np.zeros(shape=(feature_size, feature_size, 3, 5 + cfg.CLASS_NUM))
# print("labels[feature_size]: ",labels[feature_size].shape)
for box in boxes:
cls, cx, cy, w, h = box # box是标签分离出来的东西 标签为:分类,中心点xy坐标,真实框宽高
cx_offset, cx_index = math.modf(cx * feature_size / cfg.IMG_WIDTH)
# print("cx_offset, cx_index ",cx_offset, cx_index ) # ========= x
# cx_offset 是中心点相对于特征图中偏移量的 小数部分
# cx_index 整数部分
cy_offset, cy_index = math.modf(cy * feature_size / cfg.IMG_WIDTH) # ========= y
for i, anchor in enumerate(anchors):
anchor_area = cfg.ANCHORS_GROUP_AREA[feature_size][i] # feature_size 是13 26 52 ,它这里是按建议框尺寸来遍历的
#返回的是建议框的面积
p_w, p_h = w / anchor[0], h / anchor[1] # 实际框宽高/ 建议框宽高。==== 求出偏移量
p_area = w * h # 实际框的面积
iou = min(p_area, anchor_area) / max(p_area, anchor_area) # 计算iou 这里是最小面积除以最大面积
#
# labels[feature_size][int(cy_index), int(cx_index), i] = np.array(
# [iou, cx_offset, cy_offset, np.log(p_w), np.log(p_h), *one_hot(cfg.CLASS_NUM, int(cls))])
# # *one_hot(cfg.CLASS_NUM, int(cls)) 这里是分类 CLASS_NUM 是 10 分类,所以 cls 是 2 的时候,把它用onehot表现出来
# print("ok====")
# return labels[13], labels[26], labels[52], img_data # 所有的目标 * 9 个框(3个形状,3个尺寸)
return 1
# ==============================================================================================
class TestMyDataset(Dataset):
def __init__(self,img_size=416):
self.files = sorted(glob.glob('%s/*.jpg*' % IMG_PATH))
print(self.files) # ['data/test\\1.jpg', 'data/test\\2.jpg', 'data/test\\3.jpg', 'data/test\\4.jpg']
self.img_shape = (img_size, img_size)
#
# img = Image.open(self.files[0])
# img.show()
# print("=======")
# for filename in os.listdir(IMG_PATH):
# print(filename)
def __getitem__(self, index):
img_path = self.files[index % len(self.files)]
# print(index)
# print(img_path)
# 0
# data / test\1.jpg
# 1
# data / test\2.jpg
#一张一张的去处理
# Extract image
img = np.array(Image.open(img_path))
h, w, _ = img.shape
dim_diff = np.abs(h - w)
# Upper (left) and lower (right) padding
pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
# Determine padding
pad = ((pad1, pad2), (0, 0), (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0))
# Add padding
input_img = np.pad(img, pad, 'constant', constant_values=127.5) / 255.
# Resize and normalize
input_img = resize(input_img, (*self.img_shape, 3), mode='reflect')
# 将图片放缩至数据集规定的尺寸, 同时进行归一化操作
# Channels-first
input_img = np.transpose(input_img, (2, 0, 1))
# As pytorch tensor
input_img = torch.from_numpy(input_img).float()
return img_path, input_img
def __len__(self):
return len(self.files)
if __name__ == '__main__':
myDataset = dataset.TrainDataset() # 返回 return labels[13], labels[26], labels[52], img_data
train_loader = torch.utils.data.DataLoader(myDataset, batch_size=2, shuffle=False)
for batch_i, (img_paths, input_imgs) in enumerate(train_loader):
# print("input_imgs.shape",input_imgs.shape) # torch.Size([2, 3, 416, 416])
print("batch_i",batch_i)
# __getitem__(self, index): 上面的 N C H W 所以在 TestMyDataset 里面的 __getitem__ 里的index 是这样的
# 虽然输入的是两张图片,但是他会一张一张的去处理
#
pass
|
|