- class AnchorCreator:
- # 生成先验框对应的标签及与真值框间的真实偏移值
- def __init__(self, num_samples=256, positive_iou_thresh=0.7, negative_iou_thresh=0.3, positive_rate=0.5):
- """
- 初始化anchor生成器
- :param num_samples: 每帧图片上用于后续分类和回归任务的有效推荐区域总数
- :param positive_iou_thresh: 正样本的IoU判定阈值
- :param negative_iou_thresh: 负样本的判定阈值
- :param positive_rate: 正样本所占样本总数的比例
- """
- self.num_samples = num_samples
- self.positive_iou_thresh = positive_iou_thresh
- self.negative_iou_thresh = negative_iou_thresh
- self.positive_rate = positive_rate
- @staticmethod
- def is_inside_anchors(anchors: Union[np.ndarray, Tensor], width: int, height: int) -> Union[np.ndarray, Tensor]:
- """
- 获取图像内部的推荐框
- :param anchors: 生成的所有推荐框->[x1, y1, x2, y2]
- :param width: 输入图像宽度
- :param height: 输入图像高度
- :return: 未超出图像边界的推荐框
- """
- is_inside = (anchors[:, 0] >= 0) & (anchors[:, 1] >= 0) & (anchors[:, 2] <= width - 1) & (anchors[:, 3] <= height - 1)
- return is_inside
- @staticmethod
- def calc_IoU(anchors: np.ndarray, gt_boxes: np.ndarray, method=1) -> np.ndarray:
- """
- 计算推荐区域与真值的IoU
- :param anchors: 推荐区域边界框, [m, 4]维数组, 四列分别对应左上和右下两个点坐标[x1, y1, x2, y2]
- :param gt_boxes: 当前图像中所有真值边界框, [n, 4]维数组, 四列分别对应左上和右下两点坐标[x1, y1, x2, y2]
- :param method: iou计算方法
- :return: iou, [m, n]维数组, 记录每个推荐区域与每个真值框的IoU结果
- """
- # 先判断维度是否符合要求
- assert anchors.ndim == gt_boxes.ndim == 2, "anchors and ground truth bbox must be 2D array."
- assert anchors.shape[1] == gt_boxes.shape[1] == 4, "anchors and ground truth bbox must contain 4 values for 2 points."
- num_anchors, num_gts = anchors.shape[0], gt_boxes.shape[0]
- # 方法1: 利用for循环遍历求解交并比
- if method == 0:
- iou = np.zeros((num_anchors, num_gts))
- # anchor有m个, gt_box有n个, 遍历求出每个gt_box对应的iou结果即可
- for idx in range(num_gts):
- gt_box = gt_boxes[idx]
- box_area = (anchors[:, 2] - anchors[:, 0]) * (anchors[:, 3] - anchors[:, 1])
- gt_area = (gt_box[2] - gt_box[0]) * (gt_box[3] - gt_box[1])
- inter_w = np.minimum(anchors[:, 2], gt_box[2]) - np.maximum(anchors[:, 0], gt_box[0])
- inter_h = np.minimum(anchors[:, 3], gt_box[3]) - np.maximum(anchors[:, 1], gt_box[1])
- inter = np.maximum(inter_w, 0) * np.maximum(inter_h, 0)
- union = box_area + gt_area - inter
- iou[:, idx] = inter / union
- # 方法2: 利用repeat对矩阵进行升维, 从而利用对应位置计算交并比
- elif method == 1:
- # anchors维度为[m, 4], gt_boxes维度为[n, 4], 对二者通过repeat的方式都升维到[m, n, 4]
- anchors = np.repeat(anchors[:, np.newaxis, :], num_gts, axis=1)
- gt_boxes = np.repeat(gt_boxes[np.newaxis, :, :], num_anchors, axis=0)
- # 利用对应位置求解框面积
- anchors_area = (anchors[:, :, 2] - anchors[:, :, 0]) * (anchors[:, :, 3] - anchors[:, :, 1])
- gt_boxes_area = (gt_boxes[:, :, 2] - gt_boxes[:, :, 0]) * (gt_boxes[:, :, 3] - gt_boxes[:, :, 1])
- # 求交集区域的宽和高
- inter_w = np.minimum(anchors[:, :, 2], gt_boxes[:, :, 2]) - np.maximum(anchors[:, :, 0], gt_boxes[:, :, 0])
- inter_h = np.minimum(anchors[:, :, 3], gt_boxes[:, :, 3]) - np.maximum(anchors[:, :, 1], gt_boxes[:, :, 1])
- # 求交并比
- inter = np.maximum(inter_w, 0) * np.maximum(inter_h, 0)
- union = anchors_area + gt_boxes_area - inter
- iou = inter / union
- # 方法3: 利用np函数的广播机制求结果而避免使用循环
- else:
- # 计算anchors和gt_boxes左上角点的最大值, 包括两x1坐标最大值和y1坐标最大值
- # 注意anchors[:, None, :2]会增加一个新维度, 维度为[m, 1, 2], gt_boxes[:, :2]维度为[n, 2], maximum计算最大值时会将二者都扩展到[m, n, 2]
- max_left_top = np.maximum(anchors[:, None, :2], gt_boxes[:, :2])
- # 计算anchors和gt_boxes右下角点的最小值, 包括两x2坐标最大值和y2坐标最大值, 同上也用到了广播机制
- min_right_bottom = np.minimum(anchors[:, None, 2:], gt_boxes[:, 2:])
- # 求交集面积和并集面积
- # min_right_bottom - max_left_top维度为[m, n, 2], 后两列代表交集区域的宽和高
- # 用product进行两列元素乘积求交集面积, 用(max_left_top < min_right_bottom).all(axis=2)判断宽和高是否大于0, 结果维度为[m, n]
- inter = np.product(min_right_bottom - max_left_top, axis=2) * (max_left_top < min_right_bottom).all(axis=2)
- # 用product进行两列元素乘积求每个anchor的面积, 结果维度维[m]
- anchors_area = np.product(anchors[:, 2:] - anchors[:, :2], axis=1)
- # 用product进行两列元素乘积求每个gt_box的面积, 结果维度维[n]
- gt_boxes_area = np.product(gt_boxes[:, 2:] - gt_boxes[:, :2], axis=1)
- # anchors_area[:, None]维度维[m, 1], gt_boxes_area维度维[n], 二者先广播到[m, n]维度, 再和同纬度inter做减法计算, 结果维度维[m, n]
- union = anchors_area[:, None] + gt_boxes_area - inter
- iou = inter / union
- return iou
- @staticmethod
- def calc_max_iou_info(iou: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
- """
- 利用iou结果计算出最大iou及其对应位置
- :param iou: [m, n]维矩阵, 其中m为anchors数量, n为gt_boxes数量
- :return: 每一列最大iou出现的行编号, 每一行最大iou出现的列编号, 每一行的最大iou结果
- """
- # 按列求每一列的iou最大值出现的行数, 即记录与每个gt_box的iou最大的anchor的行编号, 维度和gt_box个数相同, 为n(每个gt_box对应一个anchor与之iou最大)
- max_iou_idx_anchor = np.argmax(iou, axis=0)
- # 按行求每一行的iou最大值出现的列数, 即记录与每个anchor的iou最大的gt_box的列编号, 维度和anchor个数相同, 为m(每个anchor对应一个gt_box与之iou最大)
- max_iou_idx_gt = np.argmax(iou, axis=1)
- # 求每个anchor与所有gt_box的最大iou值
- max_iou_values_anchor = np.max(iou, axis=1)
- return max_iou_idx_anchor, max_iou_idx_gt, max_iou_values_anchor
- def create_anchor_labels(self, anchors: np.ndarray, gt_boxes: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
- """
- 计算IoU结果并根据结果为每个推荐区域生成标签
- :param anchors: 生成的有效推荐区域, 列坐标对应[x1, y1, x2, y2]
- :param gt_boxes: 真值框, 列坐标对应[x1, y1, x2, y2]
- :return: 每个推荐区域的最大iou对应的真值框编号, 推荐区域对应的标签
- """
- # 计算iou结果
- iou = self.calc_IoU(anchors=anchors, gt_boxes=gt_boxes)
- # 计算行/列方向最大iou对应位置和值
- max_iou_idx_anchor, max_iou_idx_gt, max_iou_values_anchor = self.calc_max_iou_info(iou=iou)
- # 先将所有label置为-1, -1表示不进行处理, 既不是正样本也不是负样本, 再根据iou判定正样本为1, 背景为0
- labels = -1 * np.ones(anchors.shape[0], dtype="int")
- # max_iou_values_anchor为每一行最大的iou结果, 其值低于负样本阈值, 表明该行对应的anchor与所有gt_boxes的iou结果均小于阈值, 设置为负样本
- labels[max_iou_values_anchor < self.negative_iou_thresh] = 0
- # max_iou_idx_anchor为每一列iou最大值出现的行编号, 表明对应行anchor与某个gt_box的iou最大, iou最大肯定是设置为正样本
- labels[max_iou_idx_anchor] = 1
- # max_iou_values_anchor为每一行最大的iou结果, 其值大于正样本阈值, 表明该行对应的anchor与至少一个gt_box的iou结果大于阈值, 设置为正样本
- labels[max_iou_values_anchor >= self.positive_iou_thresh] = 1
-
- # 对正负样本数量进行限制
- # 计算目标正样本数量
- num_positive = int(self.num_samples * self.positive_rate)
- # 记录正样本行编号
- idx_positive = np.where(labels == 1)[0]
- if len(idx_positive) > num_positive:
- size_to_rest = len(idx_positive) - num_positive
- # 从正样本编号中随机选取一定数量将标签置为-1
- idx_to_reset = np.random.choice(a=idx_positive, size=size_to_rest, replace=False)
- labels[idx_to_reset] = -1
-
- # 计算现有负样本数量
- num_negative = self.num_samples - np.sum(labels == 1)
- # 记录负样本行编号
- idx_negative = np.where(labels == 0)[0]
- if len(idx_negative) > num_negative:
- size_to_rest = len(idx_negative) - num_negative
- # 从负样本编号中随机选取一定数量将标签置为-1
- idx_to_reset = np.random.choice(a=idx_negative, size=size_to_rest, replace=False)
- labels[idx_to_reset] = -1
-
- return max_iou_idx_gt, labels
- @staticmethod
- def calc_offsets_from_bboxes(anchors: np.ndarray, target_boxes: np.ndarray, eps: float = 1e-5) -> np.ndarray:
- """
- 计算推荐区域与真值间的位置偏移
- :param anchors: 候选边界框, 列坐标对应[x1, y1, x2, y2]
- :param target_boxes: 真值, 列坐标对应[x1, y1, x2, y2]
- :param eps: 极小值, 防止除以0或者负数
- :return: 边界框偏移值->[dx, dy, dw, dh]
- """
- offsets = np.zeros_like(anchors, dtype="float32")
- # 计算anchor中心点坐标及长宽
- anchors_h = anchors[:, 3] - anchors[:, 1] + 1
- anchors_w = anchors[:, 2] - anchors[:, 0] + 1
- anchors_cy = 0.5 * (anchors[:, 3] + anchors[:, 1])
- anchors_cx = 0.5 * (anchors[:, 2] + anchors[:, 0])
- # 计算目标真值框中心点坐标及长宽
- targets_h = target_boxes[:, 3] - target_boxes[:, 1] + 1
- targets_w = target_boxes[:, 2] - target_boxes[:, 0] + 1
- targets_cy = 0.5 * (target_boxes[:, 3] + target_boxes[:, 1])
- targets_cx = 0.5 * (target_boxes[:, 2] + target_boxes[:, 0])
- # 限制anchor长宽防止小于0
- anchors_w = np.maximum(anchors_w, eps)
- anchors_h = np.maximum(anchors_h, eps)
- # 计算偏移值
- offsets[:, 0] = (targets_cx - anchors_cx) / anchors_w
- offsets[:, 1] = (targets_cy - anchors_cy) / anchors_h
- offsets[:, 2] = np.log(targets_w / anchors_w)
- offsets[:, 3] = np.log(targets_h / anchors_h)
- return offsets
- def __call__(self, im_width: int, im_height: int, anchors: np.ndarray, gt_boxes: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
- """
- 利用真值框和先验框的iou结果为每个先验框打标签, 同时计算先验框和真值框对应的偏移值
- :param im_width: 输入图像宽度
- :param im_height: 输入图像高度
- :param anchors: 全图先验框, 列坐标对应[x1, y1, x2, y2]
- :param gt_boxes: 真值框, 列坐标对应[x1, y1, x2, y2]
- :return: 先验框对应标签和应该产生的偏移值
- """
- num_anchors = len(anchors)
- # 获取有效的推荐区域, 其维度为[m], m <= num_anchors
- is_inside = self.is_inside_anchors(anchors=anchors, width=im_width, height=im_height)
- inside_anchors = anchors[is_inside]
- # 在有效先验框基础上, 获取每个先验框的最大iou对应的真值框编号和区域标签
- max_iou_idx_gt, inside_labels = self.create_anchor_labels(anchors=inside_anchors, gt_boxes=gt_boxes)
- # 每个anchor都存在n个真值框, 选择最大iou对应的那个真值框作为每个anchor的目标框计算位置偏移
- # gt_boxes维度为[n, 4], max_iou_idx_gt维度为[m], 从真值中挑选m次即得到与每个anchor的iou最大的真值框, 即所需目标框, 维度为[m, 4]
- target_boxes = gt_boxes[max_iou_idx_gt]
- inside_offsets = self.calc_offsets_from_bboxes(anchors=inside_anchors, target_boxes=target_boxes)
- # 上面的偏移值和labels都是在inside_anchors中求得, 现在将结果映射回全图
- # 将所有标签先置为-1, 再将内部先验框标签映射回全图
- labels = -1 * np.ones(num_anchors)
- labels[is_inside] = inside_labels
- # 将所有偏移值先置为0, 再将内部先验框偏移值映射回全图
- offsets = np.zeros_like(anchors)
- offsets[is_inside] = inside_offsets
- return labels, offsets