import time
|
import cv2
|
import numpy as np
|
import onnxruntime
|
|
|
class SAFETY_DETECT:
|
|
def __init__(self, path, conf_thres=0.35, iou_thres=0.5):
|
self.conf_threshold = conf_thres
|
self.iou_threshold = iou_thres
|
|
# Initialize model
|
self.initialize_model(path)
|
self.color_palette = [(np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)) for _ in
|
range(100)]
|
def __call__(self, image):
|
return self.detect_objects(image)
|
|
def initialize_model(self, path):
|
self.session = onnxruntime.InferenceSession(path, providers=['CPUExecutionProvider'])
|
self.class_names = eval(self.session.get_modelmeta().custom_metadata_map['names'])
|
# Get model info
|
self.get_input_details()
|
self.get_output_details()
|
|
def detect_objects(self, image):
|
input_tensor, ratio = self.prepare_input(image)
|
|
# Perform inference on the image
|
outputs = self.inference(input_tensor)
|
|
self.boxes, self.scores, self.class_ids = self.process_output(outputs, ratio)
|
|
return self.boxes, self.scores, self.class_ids
|
|
def prepare_input(self, image):
|
self.img_height, self.img_width = image.shape[:2]
|
|
input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
|
# Resize图片不要直接使用resize,需要按比例缩放,空白区域填空纯色即可
|
input_img, ratio = self.ratioresize(input_img)
|
|
# Scale input pixel values to 0 to 1
|
input_img = input_img / 255.0
|
input_img = input_img.transpose(2, 0, 1)
|
input_tensor = input_img[np.newaxis, :, :, :].astype(np.float32)
|
|
return input_tensor, ratio
|
|
def inference(self, input_tensor):
|
start = time.perf_counter()
|
outputs = self.session.run(self.output_names, {self.input_names[0]: input_tensor})
|
|
# print(f"Inference time: {(time.perf_counter() - start)*1000:.2f} ms")
|
return outputs
|
|
def process_output(self, output, ratio):
|
predictions = np.squeeze(output[0]).T
|
|
# Filter out object confidence scores below threshold
|
scores = np.max(predictions[:, 4:], axis=1)
|
predictions = predictions[scores > self.conf_threshold, :]
|
scores = scores[scores > self.conf_threshold]
|
|
if len(scores) == 0:
|
return [], [], []
|
|
# Get the class with the highest confidence
|
class_ids = np.argmax(predictions[:, 4:], axis=1)
|
|
# Get bounding boxes for each object
|
boxes = self.extract_boxes(predictions, ratio)
|
|
# Apply non-maxima suppression to suppress weak, overlapping bounding boxes
|
indices = self.nms(boxes, scores, self.iou_threshold)
|
|
return boxes[indices], scores[indices], class_ids[indices]
|
|
def extract_boxes(self, predictions, ratio):
|
# Extract boxes from predictions
|
boxes = predictions[:, :4]
|
|
# Scale boxes to original image dimensions
|
# boxes = self.rescale_boxes(boxes)
|
boxes *= ratio
|
|
# Convert boxes to xyxy format
|
boxes = self.xywh2xyxy(boxes)
|
|
return boxes
|
|
def rescale_boxes(self, boxes):
|
|
# Rescale boxes to original image dimensions
|
|
input_shape = np.array([self.input_width, self.input_height, self.input_width, self.input_height])
|
boxes = np.divide(boxes, input_shape, dtype=np.float32)
|
boxes *= np.array([self.img_width, self.img_height, self.img_width, self.img_height])
|
|
return boxes
|
|
def get_input_details(self):
|
model_inputs = self.session.get_inputs()
|
self.input_names = [model_inputs[i].name for i in range(len(model_inputs))]
|
|
self.input_shape = model_inputs[0].shape
|
self.input_height = self.input_shape[2]
|
self.input_width = self.input_shape[3]
|
|
def get_output_details(self):
|
model_outputs = self.session.get_outputs()
|
self.output_names = [model_outputs[i].name for i in range(len(model_outputs))]
|
|
# 等比例缩放图片
|
def ratioresize(self, im, color=114):
|
shape = im.shape[:2]
|
new_h, new_w = self.input_height, self.input_width
|
padded_img = np.ones((new_h, new_w, 3), dtype=np.uint8) * color
|
|
# Scale ratio (new / old)
|
r = min(new_h / shape[0], new_w / shape[1])
|
|
# Compute padding
|
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
|
|
if shape[::-1] != new_unpad:
|
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
|
|
padded_img[: new_unpad[1], : new_unpad[0]] = im
|
padded_img = np.ascontiguousarray(padded_img)
|
return padded_img, 1 / r
|
|
def nms(self, boxes, scores, iou_threshold):
|
# Sort by score
|
sorted_indices = np.argsort(scores)[::-1]
|
|
keep_boxes = []
|
while sorted_indices.size > 0:
|
# Pick the last box
|
box_id = sorted_indices[0]
|
keep_boxes.append(box_id)
|
|
# Compute IoU of the picked box with the rest
|
ious = self.compute_iou(boxes[box_id, :], boxes[sorted_indices[1:], :])
|
|
# Remove boxes with IoU over the threshold
|
keep_indices = np.where(ious < iou_threshold)[0]
|
|
# print(keep_indices.shape, sorted_indices.shape)
|
sorted_indices = sorted_indices[keep_indices + 1]
|
|
return keep_boxes
|
|
def compute_iou(self, box, boxes):
|
# Compute xmin, ymin, xmax, ymax for both boxes
|
xmin = np.maximum(box[0], boxes[:, 0])
|
ymin = np.maximum(box[1], boxes[:, 1])
|
xmax = np.minimum(box[2], boxes[:, 2])
|
ymax = np.minimum(box[3], boxes[:, 3])
|
|
# Compute intersection area
|
intersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin)
|
|
# Compute union area
|
box_area = (box[2] - box[0]) * (box[3] - box[1])
|
boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
|
union_area = box_area + boxes_area - intersection_area
|
|
# Compute IoU
|
iou = intersection_area / union_area
|
|
return iou
|
|
def xywh2xyxy(self, x):
|
# Convert bounding box (x, y, w, h) to bounding box (x1, y1, x2, y2)
|
y = np.copy(x)
|
y[..., 0] = x[..., 0] - x[..., 2] / 2
|
y[..., 1] = x[..., 1] - x[..., 3] / 2
|
y[..., 2] = x[..., 0] + x[..., 2] / 2
|
y[..., 3] = x[..., 1] + x[..., 3] / 2
|
return y
|
|
def draw_detections(self, image, boxes, scores, class_ids, mask_alpha=0.3):
|
det_img = image.copy()
|
|
img_height, img_width = image.shape[:2]
|
font_size = min([img_height, img_width]) * 0.0006
|
text_thickness = int(min([img_height, img_width]) * 0.001)
|
|
det_img = self.draw_masks(det_img, boxes, class_ids, mask_alpha)
|
|
# Draw bounding boxes and labels of detections
|
for class_id, box, score in zip(class_ids, boxes, scores):
|
color = self.color_palette[class_id]
|
|
self.draw_box(det_img, box, color)
|
|
label = self.class_names[class_id]
|
caption = f'{label} {int(score * 100)}%'
|
self.draw_text(det_img, caption, box, color, font_size, text_thickness)
|
|
return det_img
|
|
def draw_box(self, image: np.ndarray, box: np.ndarray, color: tuple[int, int, int] = (0, 0, 255),
|
thickness: int = 2) -> np.ndarray:
|
x1, y1, x2, y2 = box.astype(int)
|
return cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness)
|
|
def draw_text(self, image: np.ndarray, text: str, box: np.ndarray, color: tuple[int, int, int] = (0, 0, 255),
|
font_size: float = 0.001, text_thickness: int = 2) -> np.ndarray:
|
x1, y1, x2, y2 = box.astype(int)
|
(tw, th), _ = cv2.getTextSize(text=text, fontFace=cv2.FONT_HERSHEY_SIMPLEX,
|
fontScale=font_size, thickness=text_thickness)
|
th = int(th * 1.2)
|
|
cv2.rectangle(image, (x1, y1),
|
(x1 + tw, y1 - th), color, -1)
|
|
return cv2.putText(image, text, (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, font_size, (255, 255, 255), text_thickness,
|
cv2.LINE_AA)
|
|
def draw_masks(self, image: np.ndarray, boxes: np.ndarray, classes: np.ndarray, mask_alpha: float = 0.3) -> np.ndarray:
|
mask_img = image.copy()
|
|
# Draw bounding boxes and labels of detections
|
for box, class_id in zip(boxes, classes):
|
color = self.color_palette[class_id]
|
|
x1, y1, x2, y2 = box.astype(int)
|
|
# Draw fill rectangle in mask image
|
cv2.rectangle(mask_img, (x1, y1), (x2, y2), color, -1)
|
|
return cv2.addWeighted(mask_img, mask_alpha, image, 1 - mask_alpha, 0)
|