python/pyTools.git

baoshiwei

2025-04-15 776a127b8db01cd4338f4db2a84ea567a65bff9f

rjuq

已添加12个文件

已修改3个文件

	camera_onnx.py	101 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	detect_onnx.py	220 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	onnx-test.py	50 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	onnx_predit.py	364 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	pachong.py	36 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	paherbbaidu.py	46 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	pc.py	35 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	quchong.py	15 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	replaceLabelNumber.py	38 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	save_img.py	29 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	shot_onnx.py	105 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	speech/deepSpeechTest.py	20 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	speech/formatmp3.py	5 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	speech/whisper2.py	1 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	speech/whisperTest.py	10 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史

 camera_onnx.py

¶Ô±ÈÐÂÎÄ¼þ
@@ -0,0 +1,101 @@
import cv2
import time
import numpy as np
import onnxruntime
from scipy.special import softmax

# å è½½ONNXæ¨¡å
session = onnxruntime.InferenceSession("model/classify/s.onnx")
# æåå¤´ç´¢å¼å·ï¼éå¸¸ä¸º0è¡¨ç¤ºç¬¬ä¸ä¸ªæåå¤´
camera_index = 0

# æå¼æåå¤´
cap = cv2.VideoCapture(camera_index, cv2.CAP_DSHOW)
# è®¾ç½®åè¾¨ç
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 3840)  # å®½åº¦
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 2160)  # é«åº¦
# æ£æ¥æåå¤´æ¯å¦æåæå¼
if not cap.isOpened():
    print("æ æ³æå¼æåå¤´")
    exit()

width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
print("æåå¤´åè¾¨ç:", width, "x", height)



# ä»res.jsonä¸è¯»åç±»å«
with open("res1-2.json", "r") as f:
    classes = eval(f.read())



# ç®æ å¾åå°ºå¯¸
target_width = 1024
target_height = 768

# è®¡æ¶å¨
start_time = time.time()

# å¾ªç¯è¯»åæåå¤´ç»é¢
while True:
    ret, frame = cap.read()

    if not ret:
        print("æ æ³è¯»åæåå¤´ç»é¢")
        break

    # 1920*1080çå¾åï¼ä¸å¿è£åª640*480çåºå
    cropped_frame = frame[int(height / 2 - target_height / 2):int(height / 2 + target_height / 2),
                    int(width / 2 - target_width / 2):int(width / 2 + target_width / 2)]
    # è°æ´å¾åå°ºå¯¸
    resized_frame = cv2.resize(cropped_frame, (target_width, target_height))

    # è·åå½åæ¶é´
    current_time = time.time()

    #å¦æè·ç¦»ä¸ä¸æ¬¡ä¿åå·²ç»è¿å»1ç§ï¼åä¿åå½åç»é¢
    # if current_time - start_time >= 3.0:
    #     # çæä¿åæä»¶åï¼ä»¥å½åæ¶é´å½å
    #     save_name = time.strftime("%Y%m%d%H%M%S", time.localtime()) + ".jpg"
    #     # ä¿åè°æ´å°ºå¯¸åçå¾ç
    #     cv2.imwrite(save_path + save_name, frame)
    #     print("ä¿åå¾ç:", save_name)
    #     # éç½®è®¡æ¶å¨
    #     start_time = time.time()

    # é¢å¤ç
    blob = cv2.dnn.blobFromImage(resized_frame, 1 / 255.0, (640, 640), swapRB=True, crop=False)

    # æ¨¡åæ¨ç
    outputs = session.run(None, {session.get_inputs()[0].name: blob})

    # print(outputs)
    # åºç¨softmaxå½æ°
    probabilities = outputs[0]

    # æ¾å°æå¤§æ¦ççç±»å«
    predicted_class = np.argmax(probabilities, axis=1)[0]
    max_probability = np.max(probabilities, axis=1)[0]

    # æ¾å°æ¦çè¾é«çååä¸ªç±»å«
    top_ten_classes = np.argsort(probabilities, axis=1)[0][-5:]

    # è¾åºååä¸ªç±»å«
    print("Top 5 Classes:")
    for i in top_ten_classes:
        print(f"{classes[i]}: {probabilities[0][i]}")

    # æ¾ç¤ºç»é¢
    cv2.imshow("Camera", resized_frame)

    # æ£æµæé®ï¼å¦ææä¸qé®åéåºå¾ªç¯
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# å³éæåå¤´
cap.release()

# å³éææçªå£
cv2.destroyAllWindows()

 detect_onnx.py

¶Ô±ÈÐÂÎÄ¼þ
@@ -0,0 +1,220 @@
import time
import cv2
import numpy as np
import onnxruntime


class YOLOv8:

    def __init__(self, path, conf_thres=0.7, iou_thres=0.7):
        self.conf_threshold = conf_thres
        self.iou_threshold = iou_thres

        # Initialize model
        self.initialize_model(path)

    def __call__(self, image):
        return self.detect_objects(image)

    def initialize_model(self, path):
        self.session = onnxruntime.InferenceSession(path, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
        # Get model info
        self.get_input_details()
        self.get_output_details()

    def detect_objects(self, image):
        input_tensor, ratio = self.prepare_input(image)

        # Perform inference on the image
        outputs = self.inference(input_tensor)

        self.boxes, self.scores, self.class_ids = self.process_output(outputs, ratio)

        return self.boxes, self.scores, self.class_ids

    def prepare_input(self, image):
        self.img_height, self.img_width = image.shape[:2]

        input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Resizeå¾çä¸è¦ç´æ¥ä½¿ç¨resizeï¼éè¦ææ¯ä¾ç¼©æ¾ï¼ç©ºç½åºåå¡«ç©ºçº¯è²å³å¯
        input_img, ratio = self.ratioresize(input_img)

        # Scale input pixel values to 0 to 1
        input_img = input_img / 255.0
        input_img = input_img.transpose(2, 0, 1)
        input_tensor = input_img[np.newaxis, :, :, :].astype(np.float32)

        return input_tensor, ratio

    def inference(self, input_tensor):
        start = time.perf_counter()
        outputs = self.session.run(self.output_names, {self.input_names[0]: input_tensor})

        # print(f"Inference time: {(time.perf_counter() - start)*1000:.2f} ms")
        return outputs

    def process_output(self, output, ratio):
        predictions = np.squeeze(output[0]).T

        # Filter out object confidence scores below threshold
        scores = np.max(predictions[:, 4:], axis=1)
        predictions = predictions[scores > self.conf_threshold, :]
        scores = scores[scores > self.conf_threshold]

        if len(scores) == 0:
            return [], [], []

        # Get the class with the highest confidence
        class_ids = np.argmax(predictions[:, 4:], axis=1)

        # Get bounding boxes for each object
        boxes = self.extract_boxes(predictions, ratio)

        # Apply non-maxima suppression to suppress weak, overlapping bounding boxes
        indices = self.nms(boxes, scores, self.iou_threshold)

        return boxes[indices], scores[indices], class_ids[indices]

    def extract_boxes(self, predictions, ratio):
        # Extract boxes from predictions
        boxes = predictions[:, :4]

        # Scale boxes to original image dimensions
        # boxes = self.rescale_boxes(boxes)
        boxes *= ratio

        # Convert boxes to xyxy format
        boxes = self.xywh2xyxy(boxes)

        return boxes

    def rescale_boxes(self, boxes):

        # Rescale boxes to original image dimensions

        input_shape = np.array([self.input_width, self.input_height, self.input_width, self.input_height])
        boxes = np.divide(boxes, input_shape, dtype=np.float32)
        boxes *= np.array([self.img_width, self.img_height, self.img_width, self.img_height])

        return boxes

    def get_input_details(self):
        model_inputs = self.session.get_inputs()
        self.input_names = [model_inputs[i].name for i in range(len(model_inputs))]

        self.input_shape = model_inputs[0].shape
        self.input_height = self.input_shape[2]
        self.input_width = self.input_shape[3]

    def get_output_details(self):
        model_outputs = self.session.get_outputs()
        self.output_names = [model_outputs[i].name for i in range(len(model_outputs))]

    # çæ¯ä¾ç¼©æ¾å¾ç
    def ratioresize(self, im, color=114):
        shape = im.shape[:2]
        new_h, new_w = self.input_height, self.input_width
        padded_img = np.ones((new_h, new_w, 3), dtype=np.uint8) * color

        # Scale ratio (new / old)
        r = min(new_h / shape[0], new_w / shape[1])

        # Compute padding
        new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))

        if shape[::-1] != new_unpad:
            im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)

        padded_img[: new_unpad[1], : new_unpad[0]] = im
        padded_img = np.ascontiguousarray(padded_img)
        return padded_img, 1 / r

    def nms(self, boxes, scores, iou_threshold):
        # Sort by score
        sorted_indices = np.argsort(scores)[::-1]

        keep_boxes = []
        while sorted_indices.size > 0:
            # Pick the last box
            box_id = sorted_indices[0]
            keep_boxes.append(box_id)

            # Compute IoU of the picked box with the rest
            ious = self.compute_iou(boxes[box_id, :], boxes[sorted_indices[1:], :])

            # Remove boxes with IoU over the threshold
            keep_indices = np.where(ious < iou_threshold)[0]

            # print(keep_indices.shape, sorted_indices.shape)
            sorted_indices = sorted_indices[keep_indices + 1]

        return keep_boxes

    def compute_iou(self, box, boxes):
        # Compute xmin, ymin, xmax, ymax for both boxes
        xmin = np.maximum(box[0], boxes[:, 0])
        ymin = np.maximum(box[1], boxes[:, 1])
        xmax = np.minimum(box[2], boxes[:, 2])
        ymax = np.minimum(box[3], boxes[:, 3])

        # Compute intersection area
        intersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin)

        # Compute union area
        box_area = (box[2] - box[0]) * (box[3] - box[1])
        boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
        union_area = box_area + boxes_area - intersection_area

        # Compute IoU
        iou = intersection_area / union_area

        return iou

    def xywh2xyxy(self, x):
        # Convert bounding box (x, y, w, h) to bounding box (x1, y1, x2, y2)
        y = np.copy(x)
        y[..., 0] = x[..., 0] - x[..., 2] / 2
        y[..., 1] = x[..., 1] - x[..., 3] / 2
        y[..., 2] = x[..., 0] + x[..., 2] / 2
        y[..., 3] = x[..., 1] + x[..., 3] / 2
        return y


if __name__ == "__main__":
    yolov8_detector = YOLOv8('model/detect/best.onnx', conf_thres=0.7, iou_thres=0.7)


    # æåå¤´ç´¢å¼å·ï¼éå¸¸ä¸º0è¡¨ç¤ºç¬¬ä¸ä¸ªæåå¤´
    camera_index = 0

    # æå¼æåå¤´
    cap = cv2.VideoCapture(camera_index, cv2.CAP_DSHOW)
    # è®¾ç½®åè¾¨ç
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 3840)  # å®½åº¦
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 2160)  # é«åº¦
    # æ£æ¥æåå¤´æ¯å¦æåæå¼
    if not cap.isOpened():
        print("æ æ³æå¼æåå¤´")
        exit()
    width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
    height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
    print("æåå¤´åè¾¨ç:", width, "x", height)
    # ç®æ å¾åå°ºå¯¸
    target_width = 1024
    target_height = 768
    # å¾ªç¯è¯»åæåå¤´ç»é¢
    while True:
        ret, frame = cap.read()

        if not ret:
            print("æ æ³è¯»åæåå¤´ç»é¢")
            break

        # 1920*1080çå¾åï¼ä¸å¿è£åª640*480çåºå
        cropped_frame = frame[int(height / 2 - target_height / 2):int(height / 2 + target_height / 2),
                        int(width / 2 - target_width / 2):int(width / 2 + target_width / 2)]
        # è°æ´å¾åå°ºå¯¸
        resized_frame = cv2.resize(cropped_frame, (target_width, target_height))
        boxes, scores, class_ids = yolov8_detector(resized_frame)
        print(boxes, scores, class_ids)

 onnx-test.py

¶Ô±ÈÐÂÎÄ¼þ
@@ -0,0 +1,50 @@
import cv2
import numpy as np
import onnxruntime
from scipy.special import softmax

# å è½½ONNXæ¨¡å
session = onnxruntime.InferenceSession("model/classify/best.onnx")

# è¯»åå¾ç
img = cv2.imread("D:\\temp\\15.jpg")

# ä»res.jsonä¸è¯»åç±»å«
with open("res1-2.json", "r") as f:
    classes = eval(f.read())

# é¢å¤ç
blob = cv2.dnn.blobFromImage(img, 1/255.0, (640, 640), swapRB=True, crop=False)

# æ¨¡åæ¨ç
outputs = session.run(None, {session.get_inputs()[0].name: blob})

# print(outputs)
# åºç¨softmaxå½æ°
probabilities = outputs[0]

# æ¾å°æå¤§æ¦ççç±»å«
predicted_class = np.argmax(probabilities, axis=1)[0]
max_probability = np.max(probabilities, axis=1)[0]

# æ¾å°æ¦çè¾é«çååä¸ªç±»å«
top_ten_classes = np.argsort(probabilities, axis=1)[0][-5:]

# è¾åºååä¸ªç±»å«
print("Top 10 Classes:")
for i in top_ten_classes:
    print(f"{classes[i]}: {probabilities[0][i]}")



# åå¤ç
# for detection in outputs[0][0]:
#     confidence = detection[4]
#     if confidence > 0.5:
#         class_id = int(detection[5])
#         x, y, w, h = detection[:4]
#         cv2.rectangle(img, (int(x), int(y)), (int(x+w), int(y+h)), (0, 255, 0), 2)
#
# # æ¾ç¤ºç»æ
# cv2.imshow("YOLOv8 Detection", img)
# cv2.waitKey(0)

 onnx_predit.py

¶Ô±ÈÐÂÎÄ¼þ
@@ -0,0 +1,364 @@
import time

import cv2
import onnxruntime as ort
from PIL import Image
import numpy as np

# ç½®ä¿¡åº¦
confidence_thres = 0.35
# iouéå¼
iou_thres = 0.5
# ç±»å«

classes = {0: 'herb'}
# éæºé¢è²
color_palette = np.random.uniform(100, 255, size=(len(classes), 3))

# å¤ææ¯ä½¿ç¨GPUæCPU
providers = [
    ('CUDAExecutionProvider', {
        'device_id': 0,  # å¯ä»¥éæ©GPUè®¾å¤IDï¼å¦æä½ æå¤ä¸ªGPU
    }),
    'CPUExecutionProvider',  # ä¹å¯ä»¥è®¾ç½®CPUä½ä¸ºå¤é
]

def calculate_iou(box, other_boxes):
    """
    è®¡ç®ç»å®è¾¹çæ¡ä¸ä¸ç»å¶ä»è¾¹çæ¡ä¹é´çäº¤å¹¶æ¯ï¼IoUï¼ã

    åæ°ï¼
    - box: åä¸ªè¾¹çæ¡ï¼æ ¼å¼ä¸º [x1, y1, width, height]ã
    - other_boxes: å¶ä»è¾¹çæ¡çæ°ç»ï¼æ¯ä¸ªè¾¹çæ¡çæ ¼å¼ä¹ä¸º [x1, y1, width, height]ã

    è¿åå¼ï¼
    - iou: ä¸ä¸ªæ°ç»ï¼åå«ç»å®è¾¹çæ¡ä¸æ¯ä¸ªå¶ä»è¾¹çæ¡çIoUå¼ã
    """

    # è®¡ç®äº¤éçå·¦ä¸è§åæ 
    x1 = np.maximum(box[0], np.array(other_boxes)[:, 0])
    y1 = np.maximum(box[1], np.array(other_boxes)[:, 1])
    # è®¡ç®äº¤éçå³ä¸è§åæ 
    x2 = np.minimum(box[0] + box[2], np.array(other_boxes)[:, 0] + np.array(other_boxes)[:, 2])
    y2 = np.minimum(box[1] + box[3], np.array(other_boxes)[:, 1] + np.array(other_boxes)[:, 3])
    # è®¡ç®äº¤éåºåçé¢ç§¯
    intersection_area = np.maximum(0, x2 - x1) * np.maximum(0, y2 - y1)
    # è®¡ç®ç»å®è¾¹çæ¡çé¢ç§¯
    box_area = box[2] * box[3]
    # è®¡ç®å¶ä»è¾¹çæ¡çé¢ç§¯
    other_boxes_area = np.array(other_boxes)[:, 2] * np.array(other_boxes)[:, 3]
    # è®¡ç®IoUå¼
    iou = intersection_area / (box_area + other_boxes_area - intersection_area)
    return iou

def custom_NMSBoxes(boxes, scores, confidence_threshold, iou_threshold):
    # å¦ææ²¡æè¾¹çæ¡ï¼åç´æ¥è¿åç©ºåè¡¨
    if len(boxes) == 0:
        return []
    # å°å¾ååè¾¹çæ¡è½¬æ¢ä¸ºNumPyæ°ç»
    scores = np.array(scores)
    boxes = np.array(boxes)
    # æ ¹æ®ç½®ä¿¡åº¦éå¼è¿æ»¤è¾¹çæ¡
    mask = scores > confidence_threshold
    filtered_boxes = boxes[mask]
    filtered_scores = scores[mask]
    # å¦æè¿æ»¤åæ²¡æè¾¹çæ¡ï¼åè¿åç©ºåè¡¨
    if len(filtered_boxes) == 0:
        return []
    # æ ¹æ®ç½®ä¿¡åº¦å¾åå¯¹è¾¹çæ¡è¿è¡æåº
    sorted_indices = np.argsort(filtered_scores)[::-1]
    # åå§åä¸ä¸ªç©ºåè¡¨æ¥åå¨éæ©çè¾¹çæ¡ç´¢å¼
    indices = []
    # å½è¿ææªå¤ççè¾¹çæ¡æ¶ï¼å¾ªç¯ç»§ç»
    while len(sorted_indices) > 0:
        # éæ©å¾åæé«çè¾¹çæ¡ç´¢å¼
        current_index = sorted_indices[0]
        indices.append(current_index)
        # å¦æåªå©ä¸ä¸ªè¾¹çæ¡ï¼åç»æå¾ªç¯
        if len(sorted_indices) == 1:
            break
        # è·åå½åè¾¹çæ¡åå¶ä»è¾¹çæ¡
        current_box = filtered_boxes[current_index]
        other_boxes = filtered_boxes[sorted_indices[1:]]
        # è®¡ç®å½åè¾¹çæ¡ä¸å¶ä»è¾¹çæ¡çIoU
        iou = calculate_iou(current_box, other_boxes)
        # æ¾å°IoUä½äºéå¼çè¾¹çæ¡ï¼å³ä¸å½åè¾¹çæ¡ä¸éå çè¾¹çæ¡
        non_overlapping_indices = np.where(iou <= iou_threshold)[0]
        # æ´æ°sorted_indicesä»¥ä»åå«ä¸éå çè¾¹çæ¡
        sorted_indices = sorted_indices[non_overlapping_indices + 1]
    # è¿åéæ©çè¾¹çæ¡ç´¢å¼
    return indices


def draw_detections(img, box, score, class_id):
    """
    å¨è¾å¥å¾åä¸ç»å¶æ£æµå°çå¯¹è±¡çè¾¹çæ¡åæ ç¾ã

    åæ°:
            img: è¦å¨å¶ä¸ç»å¶æ£æµç»æçè¾å¥å¾åã
            box: æ£æµå°çè¾¹çæ¡ã
            score: å¯¹åºçæ£æµå¾åã
            class_id: æ£æµå°çå¯¹è±¡çç±»å«IDã

    è¿å:
            æ 
    """

    # æåè¾¹çæ¡çåæ 
    x1, y1, w, h = box
    # æ ¹æ®ç±»å«IDæ£ç´¢é¢è²
    color = color_palette[class_id]
    # å¨å¾åä¸ç»å¶è¾¹çæ¡
    cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2)
    # åå»ºæ ç¾ææ¬ï¼åæ¬ç±»ååå¾å
    label = f'{classes[class_id]}: {score:.2f}'
    # è®¡ç®æ ç¾ææ¬çå°ºå¯¸
    (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
    # è®¡ç®æ ç¾ææ¬çä½ç½®
    label_x = x1
    label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10
    # ç»å¶å¡«åçç©å½¢ä½ä¸ºæ ç¾ææ¬çèæ¯
    cv2.rectangle(img, (label_x, label_y - label_height), (label_x + label_width, label_y + label_height), color, cv2.FILLED)
    # å¨å¾åä¸ç»å¶æ ç¾ææ¬
    cv2.putText(img, label, (label_x, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)


def preprocess(img, input_width, input_height):
    """
    å¨æ§è¡æ¨çä¹åé¢å¤çè¾å¥å¾åã

    è¿å:
        image_data: ä¸ºæ¨çåå¤å¥½çé¢å¤çåçå¾åæ°æ®ã
    """

    # è·åè¾å¥å¾åçé«åº¦åå®½åº¦
    img_height, img_width = img.shape[:2]
    # å°å¾åé¢è²ç©ºé´ä»BGRè½¬æ¢ä¸ºRGB
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    # å°å¾åå¤§å°è°æ´ä¸ºå¹éè¾å¥å½¢ç¶
    img = cv2.resize(img, (input_width, input_height))
    # éè¿é¤ä»¥255.0æ¥å½ä¸åå¾åæ°æ®
    image_data = np.array(img) / 255.0
    # è½¬ç½®å¾åï¼ä½¿ééç»´åº¦ä¸ºç¬¬ä¸ç»´
    image_data = np.transpose(image_data, (2, 0, 1))  # ééé¦
    # æ©å±å¾åæ°æ®çç»´åº¦ä»¥å¹éé¢æçè¾å¥å½¢ç¶
    image_data = np.expand_dims(image_data, axis=0).astype(np.float32)
    # è¿åé¢å¤çåçå¾åæ°æ®
    return image_data, img_height, img_width

def postprocess(input_image, output, input_width, input_height, img_width, img_height):
    """
    å¯¹æ¨¡åè¾åºè¿è¡åå¤çï¼æåè¾¹çæ¡ãå¾ååç±»å«IDã

    åæ°:
        input_image (numpy.ndarray): è¾å¥å¾åã
        output (numpy.ndarray): æ¨¡åçè¾åºã
        input_width (int): æ¨¡åè¾å¥å®½åº¦ã
        input_height (int): æ¨¡åè¾å¥é«åº¦ã
        img_width (int): åå§å¾åå®½åº¦ã
        img_height (int): åå§å¾åé«åº¦ã

    è¿å:
        numpy.ndarray: ç»å¶äºæ£æµç»æçè¾å¥å¾åã
    """

    # è½¬ç½®ååç¼©è¾åºä»¥å¹éé¢æçå½¢ç¶
    outputs = np.transpose(np.squeeze(output[0]))
    # è·åè¾åºæ°ç»çè¡æ°
    rows = outputs.shape[0]
    # ç¨äºåå¨æ£æµçè¾¹çæ¡ãå¾ååç±»å«IDçåè¡¨
    boxes = []
    scores = []
    class_ids = []
    # è®¡ç®è¾¹çæ¡åæ çç¼©æ¾å å
    x_factor = img_width / input_width
    y_factor = img_height / input_height
    # éåè¾åºæ°ç»çæ¯ä¸è¡
    for i in range(rows):
        # ä»å½åè¡æåç±»å«å¾å
        classes_scores = outputs[i][4:]
        # æ¾å°ç±»å«å¾åä¸çæå¤§å¾å
        max_score = np.amax(classes_scores)
        # å¦ææå¤§å¾åé«äºç½®ä¿¡åº¦éå¼
        if max_score >= confidence_thres:
            # è·åå¾åæé«çç±»å«ID
            class_id = np.argmax(classes_scores)
            # ä»å½åè¡æåè¾¹çæ¡åæ 
            x, y, w, h = outputs[i][0], outputs[i][1], outputs[i][2], outputs[i][3]
            # è®¡ç®è¾¹çæ¡çç¼©æ¾åæ 
            left = int((x - w / 2) * x_factor)
            top = int((y - h / 2) * y_factor)
            width = int(w * x_factor)
            height = int(h * y_factor)
            # å°ç±»å«IDãå¾ååæ¡åæ æ·»å å°åèªçåè¡¨ä¸
            class_ids.append(class_id)
            scores.append(max_score)
            boxes.append([left, top, width, height])
    # åºç¨éæå¤§æå¶è¿æ»¤éå çè¾¹çæ¡
    indices = custom_NMSBoxes(boxes, scores, confidence_thres, iou_thres)
    # éåéæå¤§æå¶åçéå®ç´¢å¼
    for i in indices:
        # æ ¹æ®ç´¢å¼è·åæ¡ãå¾ååç±»å«ID
        box = boxes[i]
        score = scores[i]
        class_id = class_ids[i]
        # å¨è¾å¥å¾åä¸ç»å¶æ£æµç»æ
        draw_detections(input_image, box, score, class_id)
    # è¿åä¿®æ¹åçè¾å¥å¾å
    return input_image

def init_detect_model(model_path):
    # ä½¿ç¨ONNXæ¨¡åæä»¶åå»ºä¸ä¸ªæ¨çä¼è¯ï¼å¹¶æå®æ§è¡æä¾è
    session = ort.InferenceSession(model_path, providers=providers)
    # è·åæ¨¡åçè¾å¥ä¿¡æ¯
    model_inputs = session.get_inputs()
    # è·åè¾å¥çå½¢ç¶ï¼ç¨äºåç»ä½¿ç¨
    input_shape = model_inputs[0].shape
    # ä»è¾å¥å½¢ç¶ä¸æåè¾å¥å®½åº¦
    input_width = input_shape[2]
    # ä»è¾å¥å½¢ç¶ä¸æåè¾å¥é«åº¦
    input_height = input_shape[3]
    # è¿åä¼è¯ãæ¨¡åè¾å¥ä¿¡æ¯ãè¾å¥å®½åº¦åè¾å¥é«åº¦
    return session, model_inputs, input_width, input_height

def detect_object(image, session, model_inputs, input_width, input_height):
    # å¦æè¾å¥çå¾åæ¯PILå¾åå¯¹è±¡ï¼å°å¶è½¬æ¢ä¸ºNumPyæ°ç»
    if isinstance(image, Image.Image):
        result_image = np.array(image)
    else:
        # å¦åï¼ç´æ¥ä½¿ç¨è¾å¥çå¾åï¼åå®å·²ç»æ¯NumPyæ°ç»ï¼
        result_image = image
    # é¢å¤çå¾åæ°æ®ï¼è°æ´å¾åå¤§å°å¹¶å¯è½è¿è¡å½ä¸åçæä½
    img_data, img_height, img_width = preprocess(result_image, input_width, input_height)
    # ä½¿ç¨é¢å¤çåçå¾åæ°æ®è¿è¡æ¨ç
    outputs = session.run(None, {model_inputs[0].name: img_data})
    # å¯¹æ¨çç»æè¿è¡åå¤çï¼ä¾å¦è§£ç æ£æµæ¡ï¼è¿æ»¤ä½ç½®ä¿¡åº¦çæ£æµç
    output_image = postprocess(result_image, outputs, input_width, input_height, img_width, img_height)
    # è¿åå¤çåçå¾å
    return output_image
if __name__ == '__main__':
    # æ¨¡åæä»¶çè·¯å¾
    model_path = 'model/detect/best.onnx'
    # åå§åæ£æµæ¨¡åï¼å è½½æ¨¡åå¹¶è·åæ¨¡åè¾å¥èç¹ä¿¡æ¯åè¾å¥å¾åçå®½åº¦ãé«åº¦
    session, model_inputs, input_width, input_height = init_detect_model(model_path)
    # ä¸ç§æ¨¡å¼ 1ä¸ºå¾çé¢æµï¼å¹¶æ¾ç¤ºç»æå¾çï¼2ä¸ºæåå¤´æ£æµï¼å¹¶å®æ¶æ¾ç¤ºFPSï¼ 3ä¸ºè§é¢æ£æµï¼å¹¶ä¿åç»æè§é¢
    mode = 2
    if mode == 1:
        # è¯»åå¾åæä»¶
        image_data = cv2.imread("street.jpg")
        # ä½¿ç¨æ£æµæ¨¡åå¯¹è¯»å¥çå¾åè¿è¡å¯¹è±¡æ£æµ
        result_image = detect_object(image_data, session, model_inputs, input_width, input_height)
        # å°æ£æµåçå¾åä¿åå°æä»¶
        cv2.imwrite("output_image.jpg", result_image)
        # å¨çªå£ä¸æ¾ç¤ºæ£æµåçå¾å
        cv2.imshow('Output', result_image)
        # çå¾ç¨æ·æé®ï¼ç¶åå³éæ¾ç¤ºçªå£
        cv2.waitKey(0)
    elif mode == 2:
        # æåå¤´ç´¢å¼å·ï¼éå¸¸ä¸º0è¡¨ç¤ºç¬¬ä¸ä¸ªæåå¤´
        camera_index = 0

        # æå¼æåå¤´
        cap = cv2.VideoCapture(camera_index, cv2.CAP_DSHOW)
        # è®¾ç½®åè¾¨ç
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 3840)  # å®½åº¦
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 2160)  # é«åº¦
        # æ£æ¥æåå¤´æ¯å¦æåæå¼
        if not cap.isOpened():
            print("æ æ³æå¼æåå¤´")
            exit()
        width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
        height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
        print("æåå¤´åè¾¨ç:", width, "x", height)


        # åå§åå¸§æ°è®¡æ°å¨åèµ·å§æ¶é´
        frame_count = 0
        start_time = time.time()

        # ç®æ å¾åå°ºå¯¸
        target_width = 1024
        target_height = 768



        # å¾ªç¯è¯»åæåå¤´è§é¢æµ
        while True:
            # è¯»åä¸å¸§
            ret, frame = cap.read()
            # æ£æ¥å¸§æ¯å¦æåè¯»å
            if not ret:
                print("Error: Could not read frame.")
                break
            # 1920*1080çå¾åï¼ä¸å¿è£åª640*480çåºå
            cropped_frame = frame[int(height / 2 - target_height / 2):int(height / 2 + target_height / 2),
                                int(width / 2 - target_width / 2):int(width / 2 + target_width / 2)]
            # è°æ´å¾åå°ºå¯¸
            resized_frame = cv2.resize(cropped_frame, (target_width, target_height))

            # ä½¿ç¨æ£æµæ¨¡åå¯¹è¯»å¥çå¸§è¿è¡å¯¹è±¡æ£æµ
            output_image = detect_object(resized_frame, session, model_inputs, input_width, input_height)
            # è®¡ç®å¸§éç
            frame_count += 1
            end_time = time.time()
            elapsed_time = end_time - start_time
            fps = frame_count / elapsed_time
            print(f"FPS: {fps:.2f}")
            # å°FPSç»å¶å¨å¾åä¸
            cv2.putText(output_image, f"FPS: {fps:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
            # å¨çªå£ä¸æ¾ç¤ºå½åå¸§
            cv2.imshow("Video", output_image)
            # æä¸ 'q' é®éåºå¾ªç¯
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        # éæ¾æåå¤´èµæº
        cap.release()
        # å³éçªå£
        cv2.destroyAllWindows()
    elif mode == 3:
        # è¾å¥è§é¢è·¯å¾
        input_video_path = 'kun.mp4'
        # è¾åºè§é¢è·¯å¾
        output_video_path = 'kun_det.mp4'
        # æå¼è§é¢æä»¶
        cap = cv2.VideoCapture(input_video_path)
        # æ£æ¥è§é¢æ¯å¦æåæå¼
        if not cap.isOpened():
            print("Error: Could not open video.")
            exit()
        # è¯»åè§é¢çåºæ¬ä¿¡æ¯
        frame_width = int(cap.get(3))
        frame_height = int(cap.get(4))
        fps = cap.get(cv2.CAP_PROP_FPS)
        # å®ä¹è§é¢ç¼ç å¨ååå»ºVideoWriterå¯¹è±¡
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # æ ¹æ®æä»¶ååç¼ä½¿ç¨åéçç¼ç å¨
        out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))
        # åå§åå¸§æ°è®¡æ°å¨åèµ·å§æ¶é´
        frame_count = 0
        start_time = time.time()
        while True:
            ret, frame = cap.read()
            if not ret:
                print("Info: End of video file.")
                break
            # å¯¹è¯»å¥çå¸§è¿è¡å¯¹è±¡æ£æµ
            output_image = detect_object(frame, session, model_inputs, input_width, input_height)
            # è®¡ç®å¹¶æå°å¸§éç
            frame_count += 1
            end_time = time.time()
            elapsed_time = end_time - start_time
            if elapsed_time > 0:
                fps = frame_count / elapsed_time
                print(f"FPS: {fps:.2f}")
            # å°å¤çåçå¸§åå¥è¾åºè§é¢
            out.write(output_image)
            #ï¼å¯éï¼å®æ¶æ¾ç¤ºå¤çåçè§é¢å¸§
            cv2.imshow("Output Video", output_image)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        # éæ¾èµæº
        cap.release()
        out.release()
        cv2.destroyAllWindows()
    else:
        print("è¾å¥éè¯¯ï¼è¯·æ£æ¥modeçèµå¼")

 pachong.py

@@ -1,13 +1,20 @@
import concurrent.futures
import os
import time
import requests
import re


def imgdata_set(save_path, word, epoch):
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    else:
        return 0
    q = 0     # åæ¢ç¬åå¾çæ¡ä»¶
    a = 0     # å¾çåç§°
    while(True):
        time.sleep(1)
        print("å¼å§ç¬åå¾ç")
        url = "https://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word={}&pn={}&ct=&ic=0&lm=-1&width=0&height=0".format(word, q)
        # word=éè¦æç´¢çåå
        headers = {
@@ -16,21 +23,28 @@
        response = requests.get(url, headers=headers)  # åéè¯·æ±è·åååº
        html = response.text  # è·åååºçHTMLåå®¹
        urls = re.findall('"objURL":"(.*?)"', html)  # ä½¿ç¨æ£åè¡¨è¾¾å¼æåå¾çURL
        for url in urls:
            try:
                print(a)  # å¾ççåå
                response = requests.get(url, headers=headers)  # åéè¯·æ±è·åå¾çååºE:\yaocai\juhua
                image = response.content  # è·åå¾çåå®¹
                with open(os.path.join(save_path, "{}.jpg".format(a)), 'wb') as f:  # å°å¾çåå®¹ä¿åå°æå®è·¯å¾
                    f.write(image)
                a = a + 1
            except Exception as e:
                pass
            continue
        print(len(urls))
        # ä½¿ç¨concurrent.futureså®ç°å¹¶åä¸è½½
        with concurrent.futures.ThreadPoolExecutor( max_workers=10) as executor:
            # æäº¤ææä¸è½½ä»»å¡å¹¶æ¶éfutureå¯¹è±¡
            futures = [executor.submit(download_image, index, headers,save_path,url ) for index,url in enumerate(urls)]
        q = q + 20
        if (q / 20) >= int(epoch):
            break


def download_image(a, headers, save_path, url):
        try:
            print(a)  # å¾ççåå
            response = requests.get(url, headers=headers, timeout=10)  # åéè¯·æ±è·åå¾çååº
            # å¦ææ²¡æä¸ç´ååºæä¹å¤ç
            image = response.content  # è·åå¾çåå®¹
            with open(os.path.join(save_path, "{}.jpg".format(a)), 'wb') as f:  # å°å¾çåå®¹ä¿åå°æå®è·¯å¾
                f.write(image)
        except Exception as e:
            pass


if __name__ == "__main__":
    save_path = input('ä½ æ³ä¿åçè·¯å¾ï¼')  # è¯¢é®ç¨æ·ä¿åè·¯å¾
    word = input('ä½ æ³è¦ä¸è½½ä»ä¹å¾çï¼è¯·è¾å¥:')  # è¯¢é®ç¨æ·æç´¢å³é®è¯

 paherbbaidu.py

¶Ô±ÈÐÂÎÄ¼þ
@@ -0,0 +1,46 @@
import mysql.connector
from mysql.connector import Error
from pachong import imgdata_set
from quchong import quchongmethod
import shutil


# è¿æ¥mysql æ¥è¯¢æ°æ®åº
def mysql_connect():
    try:
        conn = mysql.connector.connect(host='localhost',
                                       database='herb',
                                       user='root',
                                       password='123456')
        if conn.is_connected():
            print('Connected to MySQL database')
            return conn
    except Error as e:
        print(e)

# æ¥è¯¢dry_herb_infoè¡¨
def mysql_select(conn):
    cursor = conn.cursor()
    sql = "SELECT name, pinyin FROM dry_herb_info where pinyin is not null and pinyin <> ''"
    cursor.execute(sql)
    result = cursor.fetchall()
    return result


# main
if __name__ == '__main__':
    conn = mysql_connect()

    result = mysql_select(conn)

    for row in result:
        name = row[0]
        pinyin = row[1]
        print(name)
        print(pinyin)
        imgdata_set('E:/pachong/'+pinyin, name + 'é¥®ç', 2)
        quchongmethod('E:/pachong/'+pinyin, 'E:/pachong/2/'+pinyin)
        #å é¤ä¸´æ¶æä»¶å¤¹

        shutil.rmtree('E:/pachong/2/'+pinyin)


 pc.py

¶Ô±ÈÐÂÎÄ¼þ
@@ -0,0 +1,35 @@
import os
import requests
from bs4 import BeautifulSoup
from PIL import Image
from io import BytesIO


def fetch_images(keyword, save_path):
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    url = f"https://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word={keyword}"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
    }

    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')
    img_tags = soup.find_all('ObjURL', class_='obj')

    for i, img_tag in enumerate(img_tags):
        try:
            img_url = img_tag['data-src']
            img_data = requests.get(img_url).content
            img = Image.open(BytesIO(img_data))
            img.save(os.path.join(save_path, f"{keyword}_{i}.jpg"))
            print(f"Downloaded {keyword}_{i}.jpg")
        except Exception as e:
            print(f"Failed to download image: {e}")


if __name__ == "__main__":
    keyword = "äººå"
    save_path = "images"
    fetch_images(keyword, save_path)

 quchong.py

@@ -52,12 +52,9 @@
                result = "ä¸¤å¼ å¾ç¸å"
    return result

if __name__ == '__main__':

    load_path = 'E:\yaocai\yinyanghuo'  # è¦å»éçæä»¶å¤¹
    save_path = 'E:\yaocai\\2\\yinyanghuo'  # ç©ºæä»¶å¤¹ï¼ç¨äºåå¨æ£æµå°çéå¤çç§ç
def quchongmethod(load_path, save_path):
    os.makedirs(save_path, exist_ok=True)

    # è·åå¾çåè¡¨ file_mapï¼åå¸{æä»¶è·¯å¾filename : æä»¶å¤§å°image_size}
    file_map = {}
    image_size = 0
@@ -70,13 +67,11 @@
            # print('the full name of the file is %s' % os.path.join(parent, filename))
            image_size = os.path.getsize(os.path.join(parent, filename))
            file_map.setdefault(os.path.join(parent, filename), image_size)

    # è·åçå¾çåè¡¨æ æä»¶å¤§å°image_size æåº
    file_map = sorted(file_map.items(), key=lambda d: d[1], reverse=False)
    file_list = []
    for filename, image_size in file_map:
        file_list.append(filename)

    # ååºéå¤çå¾ç
    file_repeat = []
    for currIndex, filename in enumerate(file_list):
@@ -101,8 +96,14 @@
        currIndex += 1
        if currIndex >= (len(file_list)-2):
            break

    # å°éå¤çå¾çç§»å¨å°æ°çæä»¶å¤¹ï¼å®ç°å¯¹åæä»¶å¤¹éé
    for image in file_repeat:
        shutil.move(image, save_path)
        print("æ£å¨ç§»é¤éå¤ç§çï¼", image)


if __name__ == '__main__':

    load_path = 'E:\yaocai\\chenpi'  # è¦å»éçæä»¶å¤¹
    save_path = 'E:\yaocai\\2\\chenpi'  # ç©ºæä»¶å¤¹ï¼ç¨äºåå¨æ£æµå°çéå¤çç§ç
    quchongmethod(load_path,save_path)

 replaceLabelNumber.py

¶Ô±ÈÐÂÎÄ¼þ
@@ -0,0 +1,38 @@
import os

def process_file(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    with open(file_path, 'w') as file:
        for line in lines:
            # å°è¯ä»æ¯ä¸è¡ä¸æåç¬¬ä¸ä¸ªæ°å
            try:
                number = int(line.split()[0])
            except ValueError:
                # å¦ææåå¤±è´¥ï¼è·³è¿å½åè¡
                file.write(line)
                continue


            new_number = number + 1

            # æ¿æ¢åå§è¡ä¸çç¬¬ä¸ä¸ªæ°å
            updated_line = line.replace(str(number), str(new_number), 1)

            # åå¥æ´æ°åçè¡
            file.write(updated_line)

if __name__ == "__main__":
    # æå®ç®å½
    target_directory = 'E:\\herb_scan.v1i.yolov8\\valid\\labels'

    # è·åç®å½ä¸çæætxtæä»¶
    txt_files = [file for file in os.listdir(target_directory) if file.endswith('.txt')]

    # å¤çæ¯ä¸ªæä»¶
    for txt_file in txt_files:
        file_path = os.path.join(target_directory, txt_file)
        process_file(file_path)

    print("å¤çå®æï¼")

 save_img.py

@@ -6,12 +6,17 @@

# æå¼æåå¤´
cap = cv2.VideoCapture(camera_index)

# è®¾ç½®åè¾¨ç
# cap.set(cv2.CAP_PROP_FRAME_WIDTH, 3840)  # å®½åº¦
# cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 2160)  # é«åº¦
# æ£æ¥æåå¤´æ¯å¦æåæå¼
if not cap.isOpened():
    print("æ æ³æå¼æåå¤´")
    exit()

width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
print("æåå¤´åè¾¨ç:", width, "x", height)
# å¾çä¿åè·¯å¾
save_path = "captured_images/"

@@ -36,6 +41,8 @@
        print("æ æ³è¯»åæåå¤´ç»é¢")
        break

# è£åªå¾å
#     cropped_frame = frame[750:1230, 1650:2290]
    # è°æ´å¾åå°ºå¯¸
    resized_frame = cv2.resize(frame, (target_width, target_height))

@@ -43,20 +50,20 @@
    current_time = time.time()

    # å¦æè·ç¦»ä¸ä¸æ¬¡ä¿åå·²ç»è¿å»1ç§ï¼åä¿åå½åç»é¢
    if current_time - start_time >= 1.0:
        # çæä¿åæä»¶åï¼ä»¥å½åæ¶é´å½å
        save_name = time.strftime("%Y%m%d%H%M%S", time.localtime()) + ".jpg"
        # ä¿åè°æ´å°ºå¯¸åçå¾ç
        cv2.imwrite(save_path + save_name, resized_frame)
        print("ä¿åå¾ç:", save_name)
        # éç½®è®¡æ¶å¨
        start_time = time.time()
    # if current_time - start_time >= 3.0:
    #     # çæä¿åæä»¶åï¼ä»¥å½åæ¶é´å½å
    #     save_name = time.strftime("%Y%m%d%H%M%S", time.localtime()) + ".jpg"
    #     # ä¿åè°æ´å°ºå¯¸åçå¾ç
    #     cv2.imwrite(save_path + save_name, frame)
    #     print("ä¿åå¾ç:", save_name)
    #     # éç½®è®¡æ¶å¨
    #     start_time = time.time()

    # æ¾ç¤ºç»é¢
    cv2.imshow("Camera", resized_frame)
    cv2.imshow("Camera", frame)

    # æ£æµæé®ï¼å¦ææä¸qé®åéåºå¾ªç¯
    if cv2.waitKey(1000) & 0xFF == ord('q'):
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# å³éæåå¤´

 shot_onnx.py

¶Ô±ÈÐÂÎÄ¼þ
@@ -0,0 +1,105 @@
import cv2
import time
import numpy as np
import onnxruntime
from scipy.special import softmax

# å è½½ONNXæ¨¡å
session = onnxruntime.InferenceSession("model/detect/best.onnx")
# æåå¤´ç´¢å¼å·ï¼éå¸¸ä¸º0è¡¨ç¤ºç¬¬ä¸ä¸ªæåå¤´
camera_index = 0

# æå¼æåå¤´
cap = cv2.VideoCapture(camera_index, cv2.CAP_DSHOW)
# è®¾ç½®åè¾¨ç
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 3840)  # å®½åº¦
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 2160)  # é«åº¦
# æ£æ¥æåå¤´æ¯å¦æåæå¼
if not cap.isOpened():
    print("æ æ³æå¼æåå¤´")
    exit()

width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
print("æåå¤´åè¾¨ç:", width, "x", height)



# ä»res.jsonä¸è¯»åç±»å«
# with open("res1-2.json", "r") as f:
#     classes = eval(f.read())



# ç®æ å¾åå°ºå¯¸
target_width = 1024
target_height = 768

# è®¡æ¶å¨
start_time = time.time()

# å¾ªç¯è¯»åæåå¤´ç»é¢
while True:
    ret, frame = cap.read()

    if not ret:
        print("æ æ³è¯»åæåå¤´ç»é¢")
        break

    # 1920*1080çå¾åï¼ä¸å¿è£åª640*480çåºå
    cropped_frame = frame[int(height / 2 - target_height / 2):int(height / 2 + target_height / 2),
                    int(width / 2 - target_width / 2):int(width / 2 + target_width / 2)]
    # è°æ´å¾åå°ºå¯¸
    resized_frame = cv2.resize(cropped_frame, (target_width, target_height))

    # è·åå½åæ¶é´
    current_time = time.time()

    #å¦æè·ç¦»ä¸ä¸æ¬¡ä¿åå·²ç»è¿å»1ç§ï¼åä¿åå½åç»é¢
    # if current_time - start_time >= 3.0:
    #     # çæä¿åæä»¶åï¼ä»¥å½åæ¶é´å½å
    #     save_name = time.strftime("%Y%m%d%H%M%S", time.localtime()) + ".jpg"
    #     # ä¿åè°æ´å°ºå¯¸åçå¾ç
    #     cv2.imwrite(save_path + save_name, frame)
    #     print("ä¿åå¾ç:", save_name)
    #     # éç½®è®¡æ¶å¨
    #     start_time = time.time()

    # é¢å¤ç
    blob = cv2.dnn.blobFromImage(resized_frame, 1 / 255.0, (640, 640), swapRB=True, crop=False)

    # æ¨¡åæ¨ç
    outputs = session.run(None, {session.get_inputs()[0].name: blob})

    output = np.transpose(np.squeeze(outputs[0]));
    rows = output.shape[0]
    boxes = []
    scores = []
    class_ids = []
    for i in range(rows):
        classes_scores = output[i][4:]
        max_score = np.amax(classes_scores)
        if max_score > 0.5:
            classid = np.argmax(classes_scores)
            scores.append(max_score)
            class_ids.append(classid)

    print(class_ids)
    print(scores)
    # # è¾åºååä¸ªç±»å«
    # print("Top 5 Classes:")
    # for i in top_ten_classes:
    #     print(f"{classes[i]}: {probabilities[0][i]}")

    # æ¾ç¤ºç»é¢
    cv2.imshow("Camera", resized_frame)

    # æ£æµæé®ï¼å¦ææä¸qé®åéåºå¾ªç¯
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# å³éæåå¤´
cap.release()

# å³éææçªå£
cv2.destroyAllWindows()

 speech/deepSpeechTest.py

¶Ô±ÈÐÂÎÄ¼þ
@@ -0,0 +1,20 @@
from deepspeech import Model
import scipy.io.wavfile as wav
import numpy as np

# æå®æ¨¡ååè¯åå¨æä»¶è·¯å¾
model_path = 'model/deepspeech-0.9.3-models-zh-CN.pbmm'
scorer_path = 'model/deepspeech-0.9.3-models-zh-CN.scorer'

# åå§åDeepSpeechæ¨¡å
ds = Model(model_path)
ds.enableExternalScorer(scorer_path)

# å è½½é³é¢æä»¶
audio_path = 'audio/input.wav'
fs, audio = wav.read(audio_path)

# è¿è¡è¯é³è½¬æå
text = ds.stt(audio)

print('Transcribed text:', text)

 speech/formatmp3.py

¶Ô±ÈÐÂÎÄ¼þ
@@ -0,0 +1,5 @@
from pydub import AudioSegment

# å è½½mp3æä»¶
mp3_audio = AudioSegment.from_mp3("input.mp3")


 speech/whisper2.py

¶Ô±ÈÐÂÎÄ¼þ
@@ -0,0 +1 @@
from faster import WhisperModel

 speech/whisperTest.py

¶Ô±ÈÐÂÎÄ¼þ
@@ -0,0 +1,10 @@
import whisper
model = whisper.load_model("medium")
result = model.transcribe("audio/input.wav")

print(result["text"])

# tiny 72.1M æè¯´é®å¤§å®¶è¿ä¸ªè¯éç»æ¾å¨åå°äºæ¯å¡ç½åå°æ¯åå¯¹å¥½çè°¢è°¢
# base 1XXM  æå¸å¥é£ä¸ªååç»ä»æ¾å¨åå°äºæ¯7å·äºåå°æ¯å§å¯¹å¥½çè°¢è°¢å¥½
# small 461M  åä½ å¥½å¦æé é¢¨çä¸åç«å°çµ¦æ¾å¨åå°äºæ¯7èæ¨åå°æ¯å§å°å¥½çè¬è¬
# medium 1.42G  åä½ å¥½ä½ å¥½æèº«åé£å¿æä¸ªæ³å¾ç»æ¾å¨åå°äºæ¯7å·æ¥¼åå°æ¯å§å¯¹å¥½çè°¢è°¢å¥½

¶Ô±ÈÐÂÎÄ¼þ
			@@ -0,0 +1,101 @@
			import cv2
			import time
			import numpy as np
			import onnxruntime
			from scipy.special import softmax

			# å è½½ONNXæ¨¡å
			session = onnxruntime.InferenceSession("model/classify/s.onnx")
			# æåå¤´ç´¢å¼å·ï¼éå¸¸ä¸º0è¡¨ç¤ºç¬¬ä¸ä¸ªæåå¤´
			camera_index = 0

			# æå¼æåå¤´
			cap = cv2.VideoCapture(camera_index, cv2.CAP_DSHOW)
			# è®¾ç½®åè¾¨ç
			cap.set(cv2.CAP_PROP_FRAME_WIDTH, 3840) # å®½åº¦
			cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 2160) # é«åº¦
			# æ£æ¥æåå¤´æ¯å¦æåæå¼
			if not cap.isOpened():
			print("æ æ³æå¼æåå¤´")
			exit()

			width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
			height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
			print("æåå¤´åè¾¨ç:", width, "x", height)



			# ä»res.jsonä¸è¯»åç±»å«
			with open("res1-2.json", "r") as f:
			classes = eval(f.read())



			# ç®æ å¾åå°ºå¯¸
			target_width = 1024
			target_height = 768

			# è®¡æ¶å¨
			start_time = time.time()

			# å¾ªç¯è¯»åæåå¤´ç»é¢
			while True:
			ret, frame = cap.read()

			if not ret:
			print("æ æ³è¯»åæåå¤´ç»é¢")
			break

			# 19201080çå¾åï¼ä¸å¿è£åª640480çåºå
			cropped_frame = frame[int(height / 2 - target_height / 2):int(height / 2 + target_height / 2),
			int(width / 2 - target_width / 2):int(width / 2 + target_width / 2)]
			# è°æ´å¾åå°ºå¯¸
			resized_frame = cv2.resize(cropped_frame, (target_width, target_height))

			# è·åå½åæ¶é´
			current_time = time.time()

			#å¦æè·ç¦»ä¸ä¸æ¬¡ä¿åå·²ç»è¿å»1ç§ï¼åä¿åå½åç»é¢
			# if current_time - start_time >= 3.0:
			# # çæä¿åæä»¶åï¼ä»¥å½åæ¶é´å½å
			# save_name = time.strftime("%Y%m%d%H%M%S", time.localtime()) + ".jpg"
			# # ä¿åè°æ´å°ºå¯¸åçå¾ç
			# cv2.imwrite(save_path + save_name, frame)
			# print("ä¿åå¾ç:", save_name)
			# # éç½®è®¡æ¶å¨
			# start_time = time.time()

			# é¢å¤ç
			blob = cv2.dnn.blobFromImage(resized_frame, 1 / 255.0, (640, 640), swapRB=True, crop=False)

			# æ¨¡åæ¨ç
			outputs = session.run(None, {session.get_inputs()[0].name: blob})

			# print(outputs)
			# åºç¨softmaxå½æ°
			probabilities = outputs[0]

			# æ¾å°æå¤§æ¦ççç±»å«
			predicted_class = np.argmax(probabilities, axis=1)[0]
			max_probability = np.max(probabilities, axis=1)[0]

			# æ¾å°æ¦çè¾é«çååä¸ªç±»å«
			top_ten_classes = np.argsort(probabilities, axis=1)[0][-5:]

			# è¾åºååä¸ªç±»å«
			print("Top 5 Classes:")
			for i in top_ten_classes:
			print(f"{classes[i]}: {probabilities[0][i]}")

			# æ¾ç¤ºç»é¢
			cv2.imshow("Camera", resized_frame)

			# æ£æµæé®ï¼å¦ææä¸qé®åéåºå¾ªç¯
			if cv2.waitKey(1) & 0xFF == ord('q'):
			break

			# å³éæåå¤´
			cap.release()

			# å³éææçªå£
			cv2.destroyAllWindows()

¶Ô±ÈÐÂÎÄ¼þ
			@@ -0,0 +1,220 @@
			import time
			import cv2
			import numpy as np
			import onnxruntime


			class YOLOv8:

			def __init__(self, path, conf_thres=0.7, iou_thres=0.7):
			self.conf_threshold = conf_thres
			self.iou_threshold = iou_thres

			# Initialize model
			self.initialize_model(path)

			def __call__(self, image):
			return self.detect_objects(image)

			def initialize_model(self, path):
			self.session = onnxruntime.InferenceSession(path, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
			# Get model info
			self.get_input_details()
			self.get_output_details()

			def detect_objects(self, image):
			input_tensor, ratio = self.prepare_input(image)

			# Perform inference on the image
			outputs = self.inference(input_tensor)

			self.boxes, self.scores, self.class_ids = self.process_output(outputs, ratio)

			return self.boxes, self.scores, self.class_ids

			def prepare_input(self, image):
			self.img_height, self.img_width = image.shape[:2]

			input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

			# Resizeå¾çä¸è¦ç´æ¥ä½¿ç¨resizeï¼éè¦ææ¯ä¾ç¼©æ¾ï¼ç©ºç½åºåå¡«ç©ºçº¯è²å³å¯
			input_img, ratio = self.ratioresize(input_img)

			# Scale input pixel values to 0 to 1
			input_img = input_img / 255.0
			input_img = input_img.transpose(2, 0, 1)
			input_tensor = input_img[np.newaxis, :, :, :].astype(np.float32)

			return input_tensor, ratio

			def inference(self, input_tensor):
			start = time.perf_counter()
			outputs = self.session.run(self.output_names, {self.input_names[0]: input_tensor})

			# print(f"Inference time: {(time.perf_counter() - start)*1000:.2f} ms")
			return outputs

			def process_output(self, output, ratio):
			predictions = np.squeeze(output[0]).T

			# Filter out object confidence scores below threshold
			scores = np.max(predictions[:, 4:], axis=1)
			predictions = predictions[scores > self.conf_threshold, :]
			scores = scores[scores > self.conf_threshold]

			if len(scores) == 0:
			return [], [], []

			# Get the class with the highest confidence
			class_ids = np.argmax(predictions[:, 4:], axis=1)

			# Get bounding boxes for each object
			boxes = self.extract_boxes(predictions, ratio)

			# Apply non-maxima suppression to suppress weak, overlapping bounding boxes
			indices = self.nms(boxes, scores, self.iou_threshold)

			return boxes[indices], scores[indices], class_ids[indices]

			def extract_boxes(self, predictions, ratio):
			# Extract boxes from predictions
			boxes = predictions[:, :4]

			# Scale boxes to original image dimensions
			# boxes = self.rescale_boxes(boxes)
			boxes *= ratio

			# Convert boxes to xyxy format
			boxes = self.xywh2xyxy(boxes)

			return boxes

			def rescale_boxes(self, boxes):

			# Rescale boxes to original image dimensions

			input_shape = np.array([self.input_width, self.input_height, self.input_width, self.input_height])
			boxes = np.divide(boxes, input_shape, dtype=np.float32)
			boxes *= np.array([self.img_width, self.img_height, self.img_width, self.img_height])

			return boxes

			def get_input_details(self):
			model_inputs = self.session.get_inputs()
			self.input_names = [model_inputs[i].name for i in range(len(model_inputs))]

			self.input_shape = model_inputs[0].shape
			self.input_height = self.input_shape[2]
			self.input_width = self.input_shape[3]

			def get_output_details(self):
			model_outputs = self.session.get_outputs()
			self.output_names = [model_outputs[i].name for i in range(len(model_outputs))]

			# çæ¯ä¾ç¼©æ¾å¾ç
			def ratioresize(self, im, color=114):
			shape = im.shape[:2]
			new_h, new_w = self.input_height, self.input_width
			padded_img = np.ones((new_h, new_w, 3), dtype=np.uint8) * color

			# Scale ratio (new / old)
			r = min(new_h / shape[0], new_w / shape[1])

			# Compute padding
			new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))

			if shape[::-1] != new_unpad:
			im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)

			padded_img[: new_unpad[1], : new_unpad[0]] = im
			padded_img = np.ascontiguousarray(padded_img)
			return padded_img, 1 / r

			def nms(self, boxes, scores, iou_threshold):
			# Sort by score
			sorted_indices = np.argsort(scores)[::-1]

			keep_boxes = []
			while sorted_indices.size > 0:
			# Pick the last box
			box_id = sorted_indices[0]
			keep_boxes.append(box_id)

			# Compute IoU of the picked box with the rest
			ious = self.compute_iou(boxes[box_id, :], boxes[sorted_indices[1:], :])

			# Remove boxes with IoU over the threshold
			keep_indices = np.where(ious < iou_threshold)[0]

			# print(keep_indices.shape, sorted_indices.shape)
			sorted_indices = sorted_indices[keep_indices + 1]

			return keep_boxes

			def compute_iou(self, box, boxes):
			# Compute xmin, ymin, xmax, ymax for both boxes
			xmin = np.maximum(box[0], boxes[:, 0])
			ymin = np.maximum(box[1], boxes[:, 1])
			xmax = np.minimum(box[2], boxes[:, 2])
			ymax = np.minimum(box[3], boxes[:, 3])

			# Compute intersection area
			intersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin)

			# Compute union area
			box_area = (box[2] - box[0]) * (box[3] - box[1])
			boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
			union_area = box_area + boxes_area - intersection_area

			# Compute IoU
			iou = intersection_area / union_area

			return iou

			def xywh2xyxy(self, x):
			# Convert bounding box (x, y, w, h) to bounding box (x1, y1, x2, y2)
			y = np.copy(x)
			y[..., 0] = x[..., 0] - x[..., 2] / 2
			y[..., 1] = x[..., 1] - x[..., 3] / 2
			y[..., 2] = x[..., 0] + x[..., 2] / 2
			y[..., 3] = x[..., 1] + x[..., 3] / 2
			return y


			if __name__ == "__main__":
			yolov8_detector = YOLOv8('model/detect/best.onnx', conf_thres=0.7, iou_thres=0.7)


			# æåå¤´ç´¢å¼å·ï¼éå¸¸ä¸º0è¡¨ç¤ºç¬¬ä¸ä¸ªæåå¤´
			camera_index = 0

			# æå¼æåå¤´
			cap = cv2.VideoCapture(camera_index, cv2.CAP_DSHOW)
			# è®¾ç½®åè¾¨ç
			cap.set(cv2.CAP_PROP_FRAME_WIDTH, 3840) # å®½åº¦
			cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 2160) # é«åº¦
			# æ£æ¥æåå¤´æ¯å¦æåæå¼
			if not cap.isOpened():
			print("æ æ³æå¼æåå¤´")
			exit()
			width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
			height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
			print("æåå¤´åè¾¨ç:", width, "x", height)
			# ç®æ å¾åå°ºå¯¸
			target_width = 1024
			target_height = 768
			# å¾ªç¯è¯»åæåå¤´ç»é¢
			while True:
			ret, frame = cap.read()

			if not ret:
			print("æ æ³è¯»åæåå¤´ç»é¢")
			break

			# 19201080çå¾åï¼ä¸å¿è£åª640480çåºå
			cropped_frame = frame[int(height / 2 - target_height / 2):int(height / 2 + target_height / 2),
			int(width / 2 - target_width / 2):int(width / 2 + target_width / 2)]
			# è°æ´å¾åå°ºå¯¸
			resized_frame = cv2.resize(cropped_frame, (target_width, target_height))
			boxes, scores, class_ids = yolov8_detector(resized_frame)
			print(boxes, scores, class_ids)

¶Ô±ÈÐÂÎÄ¼þ
			@@ -0,0 +1,50 @@
			import cv2
			import numpy as np
			import onnxruntime
			from scipy.special import softmax

			# å è½½ONNXæ¨¡å
			session = onnxruntime.InferenceSession("model/classify/best.onnx")

			# è¯»åå¾ç
			img = cv2.imread("D:\\temp\\15.jpg")

			# ä»res.jsonä¸è¯»åç±»å«
			with open("res1-2.json", "r") as f:
			classes = eval(f.read())

			# é¢å¤ç
			blob = cv2.dnn.blobFromImage(img, 1/255.0, (640, 640), swapRB=True, crop=False)

			# æ¨¡åæ¨ç
			outputs = session.run(None, {session.get_inputs()[0].name: blob})

			# print(outputs)
			# åºç¨softmaxå½æ°
			probabilities = outputs[0]

			# æ¾å°æå¤§æ¦ççç±»å«
			predicted_class = np.argmax(probabilities, axis=1)[0]
			max_probability = np.max(probabilities, axis=1)[0]

			# æ¾å°æ¦çè¾é«çååä¸ªç±»å«
			top_ten_classes = np.argsort(probabilities, axis=1)[0][-5:]

			# è¾åºååä¸ªç±»å«
			print("Top 10 Classes:")
			for i in top_ten_classes:
			print(f"{classes[i]}: {probabilities[0][i]}")



			# åå¤ç
			# for detection in outputs[0][0]:
			# confidence = detection[4]
			# if confidence > 0.5:
			# class_id = int(detection[5])
			# x, y, w, h = detection[:4]
			# cv2.rectangle(img, (int(x), int(y)), (int(x+w), int(y+h)), (0, 255, 0), 2)
			#
			# # æ¾ç¤ºç»æ
			# cv2.imshow("YOLOv8 Detection", img)
			# cv2.waitKey(0)

¶Ô±ÈÐÂÎÄ¼þ
			@@ -0,0 +1,364 @@
			import time

			import cv2
			import onnxruntime as ort
			from PIL import Image
			import numpy as np

			# ç½®ä¿¡åº¦
			confidence_thres = 0.35
			# iouéå¼
			iou_thres = 0.5
			# ç±»å«

			classes = {0: 'herb'}
			# éæºé¢è²
			color_palette = np.random.uniform(100, 255, size=(len(classes), 3))

			# å¤ææ¯ä½¿ç¨GPUæCPU
			providers = [
			('CUDAExecutionProvider', {
			'device_id': 0, # å¯ä»¥éæ©GPUè®¾å¤IDï¼å¦æä½ æå¤ä¸ªGPU
			}),
			'CPUExecutionProvider', # ä¹å¯ä»¥è®¾ç½®CPUä½ä¸ºå¤é
			]

			def calculate_iou(box, other_boxes):
			"""
			è®¡ç®ç»å®è¾¹çæ¡ä¸ä¸ç»å¶ä»è¾¹çæ¡ä¹é´çäº¤å¹¶æ¯ï¼IoUï¼ã

			åæ°ï¼
			- box: åä¸ªè¾¹çæ¡ï¼æ ¼å¼ä¸º [x1, y1, width, height]ã
			- other_boxes: å¶ä»è¾¹çæ¡çæ°ç»ï¼æ¯ä¸ªè¾¹çæ¡çæ ¼å¼ä¹ä¸º [x1, y1, width, height]ã

			è¿åå¼ï¼
			- iou: ä¸ä¸ªæ°ç»ï¼åå«ç»å®è¾¹çæ¡ä¸æ¯ä¸ªå¶ä»è¾¹çæ¡çIoUå¼ã
			"""

			# è®¡ç®äº¤éçå·¦ä¸è§åæ
			x1 = np.maximum(box[0], np.array(other_boxes)[:, 0])
			y1 = np.maximum(box[1], np.array(other_boxes)[:, 1])
			# è®¡ç®äº¤éçå³ä¸è§åæ
			x2 = np.minimum(box[0] + box[2], np.array(other_boxes)[:, 0] + np.array(other_boxes)[:, 2])
			y2 = np.minimum(box[1] + box[3], np.array(other_boxes)[:, 1] + np.array(other_boxes)[:, 3])
			# è®¡ç®äº¤éåºåçé¢ç§¯
			intersection_area = np.maximum(0, x2 - x1) * np.maximum(0, y2 - y1)
			# è®¡ç®ç»å®è¾¹çæ¡çé¢ç§¯
			box_area = box[2] * box[3]
			# è®¡ç®å¶ä»è¾¹çæ¡çé¢ç§¯
			other_boxes_area = np.array(other_boxes)[:, 2] * np.array(other_boxes)[:, 3]
			# è®¡ç®IoUå¼
			iou = intersection_area / (box_area + other_boxes_area - intersection_area)
			return iou

			def custom_NMSBoxes(boxes, scores, confidence_threshold, iou_threshold):
			# å¦ææ²¡æè¾¹çæ¡ï¼åç´æ¥è¿åç©ºåè¡¨
			if len(boxes) == 0:
			return []
			# å°å¾ååè¾¹çæ¡è½¬æ¢ä¸ºNumPyæ°ç»
			scores = np.array(scores)
			boxes = np.array(boxes)
			# æ ¹æ®ç½®ä¿¡åº¦éå¼è¿æ»¤è¾¹çæ¡
			mask = scores > confidence_threshold
			filtered_boxes = boxes[mask]
			filtered_scores = scores[mask]
			# å¦æè¿æ»¤åæ²¡æè¾¹çæ¡ï¼åè¿åç©ºåè¡¨
			if len(filtered_boxes) == 0:
			return []
			# æ ¹æ®ç½®ä¿¡åº¦å¾åå¯¹è¾¹çæ¡è¿è¡æåº
			sorted_indices = np.argsort(filtered_scores)[::-1]
			# åå§åä¸ä¸ªç©ºåè¡¨æ¥åå¨éæ©çè¾¹çæ¡ç´¢å¼
			indices = []
			# å½è¿ææªå¤ççè¾¹çæ¡æ¶ï¼å¾ªç¯ç»§ç»
			while len(sorted_indices) > 0:
			# éæ©å¾åæé«çè¾¹çæ¡ç´¢å¼
			current_index = sorted_indices[0]
			indices.append(current_index)
			# å¦æåªå©ä¸ä¸ªè¾¹çæ¡ï¼åç»æå¾ªç¯
			if len(sorted_indices) == 1:
			break
			# è·åå½åè¾¹çæ¡åå¶ä»è¾¹çæ¡
			current_box = filtered_boxes[current_index]
			other_boxes = filtered_boxes[sorted_indices[1:]]
			# è®¡ç®å½åè¾¹çæ¡ä¸å¶ä»è¾¹çæ¡çIoU
			iou = calculate_iou(current_box, other_boxes)
			# æ¾å°IoUä½äºéå¼çè¾¹çæ¡ï¼å³ä¸å½åè¾¹çæ¡ä¸éå çè¾¹çæ¡
			non_overlapping_indices = np.where(iou <= iou_threshold)[0]
			# æ´æ°sorted_indicesä»¥ä»åå«ä¸éå çè¾¹çæ¡
			sorted_indices = sorted_indices[non_overlapping_indices + 1]
			# è¿åéæ©çè¾¹çæ¡ç´¢å¼
			return indices


			def draw_detections(img, box, score, class_id):
			"""
			å¨è¾å¥å¾åä¸ç»å¶æ£æµå°çå¯¹è±¡çè¾¹çæ¡åæ ç¾ã

			åæ°:
			img: è¦å¨å¶ä¸ç»å¶æ£æµç»æçè¾å¥å¾åã
			box: æ£æµå°çè¾¹çæ¡ã
			score: å¯¹åºçæ£æµå¾åã
			class_id: æ£æµå°çå¯¹è±¡çç±»å«IDã

			è¿å:
			æ
			"""

			# æåè¾¹çæ¡çåæ
			x1, y1, w, h = box
			# æ ¹æ®ç±»å«IDæ£ç´¢é¢è²
			color = color_palette[class_id]
			# å¨å¾åä¸ç»å¶è¾¹çæ¡
			cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2)
			# åå»ºæ ç¾ææ¬ï¼åæ¬ç±»ååå¾å
			label = f'{classes[class_id]}: {score:.2f}'
			# è®¡ç®æ ç¾ææ¬çå°ºå¯¸
			(label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
			# è®¡ç®æ ç¾ææ¬çä½ç½®
			label_x = x1
			label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10
			# ç»å¶å¡«åçç©å½¢ä½ä¸ºæ ç¾ææ¬çèæ¯
			cv2.rectangle(img, (label_x, label_y - label_height), (label_x + label_width, label_y + label_height), color, cv2.FILLED)
			# å¨å¾åä¸ç»å¶æ ç¾ææ¬
			cv2.putText(img, label, (label_x, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)


			def preprocess(img, input_width, input_height):
			"""
			å¨æ§è¡æ¨çä¹åé¢å¤çè¾å¥å¾åã

			è¿å:
			image_data: ä¸ºæ¨çåå¤å¥½çé¢å¤çåçå¾åæ°æ®ã
			"""

			# è·åè¾å¥å¾åçé«åº¦åå®½åº¦
			img_height, img_width = img.shape[:2]
			# å°å¾åé¢è²ç©ºé´ä»BGRè½¬æ¢ä¸ºRGB
			img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
			# å°å¾åå¤§å°è°æ´ä¸ºå¹éè¾å¥å½¢ç¶
			img = cv2.resize(img, (input_width, input_height))
			# éè¿é¤ä»¥255.0æ¥å½ä¸åå¾åæ°æ®
			image_data = np.array(img) / 255.0
			# è½¬ç½®å¾åï¼ä½¿ééç»´åº¦ä¸ºç¬¬ä¸ç»´
			image_data = np.transpose(image_data, (2, 0, 1)) # ééé¦
			# æ©å±å¾åæ°æ®çç»´åº¦ä»¥å¹éé¢æçè¾å¥å½¢ç¶
			image_data = np.expand_dims(image_data, axis=0).astype(np.float32)
			# è¿åé¢å¤çåçå¾åæ°æ®
			return image_data, img_height, img_width

			def postprocess(input_image, output, input_width, input_height, img_width, img_height):
			"""
			å¯¹æ¨¡åè¾åºè¿è¡åå¤çï¼æåè¾¹çæ¡ãå¾ååç±»å«IDã

			åæ°:
			input_image (numpy.ndarray): è¾å¥å¾åã
			output (numpy.ndarray): æ¨¡åçè¾åºã
			input_width (int): æ¨¡åè¾å¥å®½åº¦ã
			input_height (int): æ¨¡åè¾å¥é«åº¦ã
			img_width (int): åå§å¾åå®½åº¦ã
			img_height (int): åå§å¾åé«åº¦ã

			è¿å:
			numpy.ndarray: ç»å¶äºæ£æµç»æçè¾å¥å¾åã
			"""

			# è½¬ç½®ååç¼©è¾åºä»¥å¹éé¢æçå½¢ç¶
			outputs = np.transpose(np.squeeze(output[0]))
			# è·åè¾åºæ°ç»çè¡æ°
			rows = outputs.shape[0]
			# ç¨äºåå¨æ£æµçè¾¹çæ¡ãå¾ååç±»å«IDçåè¡¨
			boxes = []
			scores = []
			class_ids = []
			# è®¡ç®è¾¹çæ¡åæ çç¼©æ¾å å
			x_factor = img_width / input_width
			y_factor = img_height / input_height
			# éåè¾åºæ°ç»çæ¯ä¸è¡
			for i in range(rows):
			# ä»å½åè¡æåç±»å«å¾å
			classes_scores = outputs[i][4:]
			# æ¾å°ç±»å«å¾åä¸çæå¤§å¾å
			max_score = np.amax(classes_scores)
			# å¦ææå¤§å¾åé«äºç½®ä¿¡åº¦éå¼
			if max_score >= confidence_thres:
			# è·åå¾åæé«çç±»å«ID
			class_id = np.argmax(classes_scores)
			# ä»å½åè¡æåè¾¹çæ¡åæ
			x, y, w, h = outputs[i][0], outputs[i][1], outputs[i][2], outputs[i][3]
			# è®¡ç®è¾¹çæ¡çç¼©æ¾åæ
			left = int((x - w / 2) * x_factor)
			top = int((y - h / 2) * y_factor)
			width = int(w * x_factor)
			height = int(h * y_factor)
			# å°ç±»å«IDãå¾ååæ¡åæ æ·»å å°åèªçåè¡¨ä¸
			class_ids.append(class_id)
			scores.append(max_score)
			boxes.append([left, top, width, height])
			# åºç¨éæå¤§æå¶è¿æ»¤éå çè¾¹çæ¡
			indices = custom_NMSBoxes(boxes, scores, confidence_thres, iou_thres)
			# éåéæå¤§æå¶åçéå®ç´¢å¼
			for i in indices:
			# æ ¹æ®ç´¢å¼è·åæ¡ãå¾ååç±»å«ID
			box = boxes[i]
			score = scores[i]
			class_id = class_ids[i]
			# å¨è¾å¥å¾åä¸ç»å¶æ£æµç»æ
			draw_detections(input_image, box, score, class_id)
			# è¿åä¿®æ¹åçè¾å¥å¾å
			return input_image

			def init_detect_model(model_path):
			# ä½¿ç¨ONNXæ¨¡åæä»¶åå»ºä¸ä¸ªæ¨çä¼è¯ï¼å¹¶æå®æ§è¡æä¾è
			session = ort.InferenceSession(model_path, providers=providers)
			# è·åæ¨¡åçè¾å¥ä¿¡æ¯
			model_inputs = session.get_inputs()
			# è·åè¾å¥çå½¢ç¶ï¼ç¨äºåç»ä½¿ç¨
			input_shape = model_inputs[0].shape
			# ä»è¾å¥å½¢ç¶ä¸æåè¾å¥å®½åº¦
			input_width = input_shape[2]
			# ä»è¾å¥å½¢ç¶ä¸æåè¾å¥é«åº¦
			input_height = input_shape[3]
			# è¿åä¼è¯ãæ¨¡åè¾å¥ä¿¡æ¯ãè¾å¥å®½åº¦åè¾å¥é«åº¦
			return session, model_inputs, input_width, input_height

			def detect_object(image, session, model_inputs, input_width, input_height):
			# å¦æè¾å¥çå¾åæ¯PILå¾åå¯¹è±¡ï¼å°å¶è½¬æ¢ä¸ºNumPyæ°ç»
			if isinstance(image, Image.Image):
			result_image = np.array(image)
			else:
			# å¦åï¼ç´æ¥ä½¿ç¨è¾å¥çå¾åï¼åå®å·²ç»æ¯NumPyæ°ç»ï¼
			result_image = image
			# é¢å¤çå¾åæ°æ®ï¼è°æ´å¾åå¤§å°å¹¶å¯è½è¿è¡å½ä¸åçæä½
			img_data, img_height, img_width = preprocess(result_image, input_width, input_height)
			# ä½¿ç¨é¢å¤çåçå¾åæ°æ®è¿è¡æ¨ç
			outputs = session.run(None, {model_inputs[0].name: img_data})
			# å¯¹æ¨çç»æè¿è¡åå¤çï¼ä¾å¦è§£ç æ£æµæ¡ï¼è¿æ»¤ä½ç½®ä¿¡åº¦çæ£æµç
			output_image = postprocess(result_image, outputs, input_width, input_height, img_width, img_height)
			# è¿åå¤çåçå¾å
			return output_image
			if __name__ == '__main__':
			# æ¨¡åæä»¶çè·¯å¾
			model_path = 'model/detect/best.onnx'
			# åå§åæ£æµæ¨¡åï¼å è½½æ¨¡åå¹¶è·åæ¨¡åè¾å¥èç¹ä¿¡æ¯åè¾å¥å¾åçå®½åº¦ãé«åº¦
			session, model_inputs, input_width, input_height = init_detect_model(model_path)
			# ä¸ç§æ¨¡å¼ 1ä¸ºå¾çé¢æµï¼å¹¶æ¾ç¤ºç»æå¾çï¼2ä¸ºæåå¤´æ£æµï¼å¹¶å®æ¶æ¾ç¤ºFPSï¼ 3ä¸ºè§é¢æ£æµï¼å¹¶ä¿åç»æè§é¢
			mode = 2
			if mode == 1:
			# è¯»åå¾åæä»¶
			image_data = cv2.imread("street.jpg")
			# ä½¿ç¨æ£æµæ¨¡åå¯¹è¯»å¥çå¾åè¿è¡å¯¹è±¡æ£æµ
			result_image = detect_object(image_data, session, model_inputs, input_width, input_height)
			# å°æ£æµåçå¾åä¿åå°æä»¶
			cv2.imwrite("output_image.jpg", result_image)
			# å¨çªå£ä¸æ¾ç¤ºæ£æµåçå¾å
			cv2.imshow('Output', result_image)
			# çå¾ç¨æ·æé®ï¼ç¶åå³éæ¾ç¤ºçªå£
			cv2.waitKey(0)
			elif mode == 2:
			# æåå¤´ç´¢å¼å·ï¼éå¸¸ä¸º0è¡¨ç¤ºç¬¬ä¸ä¸ªæåå¤´
			camera_index = 0

			# æå¼æåå¤´
			cap = cv2.VideoCapture(camera_index, cv2.CAP_DSHOW)
			# è®¾ç½®åè¾¨ç
			cap.set(cv2.CAP_PROP_FRAME_WIDTH, 3840) # å®½åº¦
			cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 2160) # é«åº¦
			# æ£æ¥æåå¤´æ¯å¦æåæå¼
			if not cap.isOpened():
			print("æ æ³æå¼æåå¤´")
			exit()
			width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
			height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
			print("æåå¤´åè¾¨ç:", width, "x", height)


			# åå§åå¸§æ°è®¡æ°å¨åèµ·å§æ¶é´
			frame_count = 0
			start_time = time.time()

			# ç®æ å¾åå°ºå¯¸
			target_width = 1024
			target_height = 768



			# å¾ªç¯è¯»åæåå¤´è§é¢æµ
			while True:
			# è¯»åä¸å¸§
			ret, frame = cap.read()
			# æ£æ¥å¸§æ¯å¦æåè¯»å
			if not ret:
			print("Error: Could not read frame.")
			break
			# 19201080çå¾åï¼ä¸å¿è£åª640480çåºå
			cropped_frame = frame[int(height / 2 - target_height / 2):int(height / 2 + target_height / 2),
			int(width / 2 - target_width / 2):int(width / 2 + target_width / 2)]
			# è°æ´å¾åå°ºå¯¸
			resized_frame = cv2.resize(cropped_frame, (target_width, target_height))

			# ä½¿ç¨æ£æµæ¨¡åå¯¹è¯»å¥çå¸§è¿è¡å¯¹è±¡æ£æµ
			output_image = detect_object(resized_frame, session, model_inputs, input_width, input_height)
			# è®¡ç®å¸§éç
			frame_count += 1
			end_time = time.time()
			elapsed_time = end_time - start_time
			fps = frame_count / elapsed_time
			print(f"FPS: {fps:.2f}")
			# å°FPSç»å¶å¨å¾åä¸
			cv2.putText(output_image, f"FPS: {fps:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
			# å¨çªå£ä¸æ¾ç¤ºå½åå¸§
			cv2.imshow("Video", output_image)
			# æä¸ 'q' é®éåºå¾ªç¯
			if cv2.waitKey(1) & 0xFF == ord('q'):
			break
			# éæ¾æåå¤´èµæº
			cap.release()
			# å³éçªå£
			cv2.destroyAllWindows()
			elif mode == 3:
			# è¾å¥è§é¢è·¯å¾
			input_video_path = 'kun.mp4'
			# è¾åºè§é¢è·¯å¾
			output_video_path = 'kun_det.mp4'
			# æå¼è§é¢æä»¶
			cap = cv2.VideoCapture(input_video_path)
			# æ£æ¥è§é¢æ¯å¦æåæå¼
			if not cap.isOpened():
			print("Error: Could not open video.")
			exit()
			# è¯»åè§é¢çåºæ¬ä¿¡æ¯
			frame_width = int(cap.get(3))
			frame_height = int(cap.get(4))
			fps = cap.get(cv2.CAP_PROP_FPS)
			# å®ä¹è§é¢ç¼ç å¨ååå»ºVideoWriterå¯¹è±¡
			fourcc = cv2.VideoWriter_fourcc(*'mp4v') # æ ¹æ®æä»¶ååç¼ä½¿ç¨åéçç¼ç å¨
			out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))
			# åå§åå¸§æ°è®¡æ°å¨åèµ·å§æ¶é´
			frame_count = 0
			start_time = time.time()
			while True:
			ret, frame = cap.read()
			if not ret:
			print("Info: End of video file.")
			break
			# å¯¹è¯»å¥çå¸§è¿è¡å¯¹è±¡æ£æµ
			output_image = detect_object(frame, session, model_inputs, input_width, input_height)
			# è®¡ç®å¹¶æå°å¸§éç
			frame_count += 1
			end_time = time.time()
			elapsed_time = end_time - start_time
			if elapsed_time > 0:
			fps = frame_count / elapsed_time
			print(f"FPS: {fps:.2f}")
			# å°å¤çåçå¸§åå¥è¾åºè§é¢
			out.write(output_image)
			#ï¼å¯éï¼å®æ¶æ¾ç¤ºå¤çåçè§é¢å¸§
			cv2.imshow("Output Video", output_image)
			if cv2.waitKey(1) & 0xFF == ord('q'):
			break
			# éæ¾èµæº
			cap.release()
			out.release()
			cv2.destroyAllWindows()
			else:
			print("è¾å¥éè¯¯ï¼è¯·æ£æ¥modeçèµå¼")

			@@ -1,13 +1,20 @@
			import concurrent.futures
			import os
			import time
			import requests
			import re


			def imgdata_set(save_path, word, epoch):
			if not os.path.exists(save_path):
			os.makedirs(save_path)
			else:
			return 0
			q = 0 # åæ¢ç¬åå¾çæ¡ä»¶
			a = 0 # å¾çåç§°
			while(True):
			time.sleep(1)
			print("å¼å§ç¬åå¾ç")
			url = "https://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word={}&pn={}&ct=&ic=0&lm=-1&width=0&height=0".format(word, q)
			# word=éè¦æç´¢çåå
			headers = {
			@@ -16,21 +23,28 @@
			response = requests.get(url, headers=headers) # åéè¯·æ±è·åååº
			html = response.text # è·åååºçHTMLåå®¹
			urls = re.findall('"objURL":"(.*?)"', html) # ä½¿ç¨æ£åè¡¨è¾¾å¼æåå¾çURL
			for url in urls:
			try:
			print(a) # å¾ççåå
			response = requests.get(url, headers=headers) # åéè¯·æ±è·åå¾çååºE:\yaocai\juhua
			image = response.content # è·åå¾çåå®¹
			with open(os.path.join(save_path, "{}.jpg".format(a)), 'wb') as f: # å°å¾çåå®¹ä¿åå°æå®è·¯å¾
			f.write(image)
			a = a + 1
			except Exception as e:
			pass
			continue
			print(len(urls))
			# ä½¿ç¨concurrent.futureså®ç°å¹¶åä¸è½½
			with concurrent.futures.ThreadPoolExecutor( max_workers=10) as executor:
			# æäº¤ææä¸è½½ä»»å¡å¹¶æ¶éfutureå¯¹è±¡
			futures = [executor.submit(download_image, index, headers,save_path,url ) for index,url in enumerate(urls)]
			q = q + 20
			if (q / 20) >= int(epoch):
			break


			def download_image(a, headers, save_path, url):
			try:
			print(a) # å¾ççåå
			response = requests.get(url, headers=headers, timeout=10) # åéè¯·æ±è·åå¾çååº
			# å¦ææ²¡æä¸ç´ååºæä¹å¤ç
			image = response.content # è·åå¾çåå®¹
			with open(os.path.join(save_path, "{}.jpg".format(a)), 'wb') as f: # å°å¾çåå®¹ä¿åå°æå®è·¯å¾
			f.write(image)
			except Exception as e:
			pass


			if __name__ == "__main__":
			save_path = input('ä½ æ³ä¿åçè·¯å¾ï¼') # è¯¢é®ç¨æ·ä¿åè·¯å¾
			word = input('ä½ æ³è¦ä¸è½½ä»ä¹å¾çï¼è¯·è¾å¥:') # è¯¢é®ç¨æ·æç´¢å³é®è¯

¶Ô±ÈÐÂÎÄ¼þ
			@@ -0,0 +1,46 @@
			import mysql.connector
			from mysql.connector import Error
			from pachong import imgdata_set
			from quchong import quchongmethod
			import shutil


			# è¿æ¥mysql æ¥è¯¢æ°æ®åº
			def mysql_connect():
			try:
			conn = mysql.connector.connect(host='localhost',
			database='herb',
			user='root',
			password='123456')
			if conn.is_connected():
			print('Connected to MySQL database')
			return conn
			except Error as e:
			print(e)

			# æ¥è¯¢dry_herb_infoè¡¨
			def mysql_select(conn):
			cursor = conn.cursor()
			sql = "SELECT name, pinyin FROM dry_herb_info where pinyin is not null and pinyin <> ''"
			cursor.execute(sql)
			result = cursor.fetchall()
			return result


			# main
			if __name__ == '__main__':
			conn = mysql_connect()

			result = mysql_select(conn)

			for row in result:
			name = row[0]
			pinyin = row[1]
			print(name)
			print(pinyin)
			imgdata_set('E:/pachong/'+pinyin, name + 'é¥®ç', 2)
			quchongmethod('E:/pachong/'+pinyin, 'E:/pachong/2/'+pinyin)
			#å é¤ä¸´æ¶æä»¶å¤¹

			shutil.rmtree('E:/pachong/2/'+pinyin)

¶Ô±ÈÐÂÎÄ¼þ
			@@ -0,0 +1,35 @@
			import os
			import requests
			from bs4 import BeautifulSoup
			from PIL import Image
			from io import BytesIO


			def fetch_images(keyword, save_path):
			if not os.path.exists(save_path):
			os.makedirs(save_path)

			url = f"https://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word={keyword}"
			headers = {
			'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
			}

			response = requests.get(url, headers=headers)
			soup = BeautifulSoup(response.text, 'html.parser')
			img_tags = soup.find_all('ObjURL', class_='obj')

			for i, img_tag in enumerate(img_tags):
			try:
			img_url = img_tag['data-src']
			img_data = requests.get(img_url).content
			img = Image.open(BytesIO(img_data))
			img.save(os.path.join(save_path, f"{keyword}_{i}.jpg"))
			print(f"Downloaded {keyword}_{i}.jpg")
			except Exception as e:
			print(f"Failed to download image: {e}")


			if __name__ == "__main__":
			keyword = "äººå"
			save_path = "images"
			fetch_images(keyword, save_path)

			@@ -52,12 +52,9 @@
			result = "ä¸¤å¼ å¾ç¸å"
			return result

			if __name__ == '__main__':

			load_path = 'E:\yaocai\yinyanghuo' # è¦å»éçæä»¶å¤¹
			save_path = 'E:\yaocai\\2\\yinyanghuo' # ç©ºæä»¶å¤¹ï¼ç¨äºåå¨æ£æµå°çéå¤çç§ç
			def quchongmethod(load_path, save_path):
			os.makedirs(save_path, exist_ok=True)

			# è·åå¾çåè¡¨ file_mapï¼åå¸{æä»¶è·¯å¾filename : æä»¶å¤§å°image_size}
			file_map = {}
			image_size = 0
			@@ -70,13 +67,11 @@
			# print('the full name of the file is %s' % os.path.join(parent, filename))
			image_size = os.path.getsize(os.path.join(parent, filename))
			file_map.setdefault(os.path.join(parent, filename), image_size)

			# è·åçå¾çåè¡¨æ æä»¶å¤§å°image_size æåº
			file_map = sorted(file_map.items(), key=lambda d: d[1], reverse=False)
			file_list = []
			for filename, image_size in file_map:
			file_list.append(filename)

			# ååºéå¤çå¾ç
			file_repeat = []
			for currIndex, filename in enumerate(file_list):
			@@ -101,8 +96,14 @@
			currIndex += 1
			if currIndex >= (len(file_list)-2):
			break

			# å°éå¤çå¾çç§»å¨å°æ°çæä»¶å¤¹ï¼å®ç°å¯¹åæä»¶å¤¹éé
			for image in file_repeat:
			shutil.move(image, save_path)
			print("æ£å¨ç§»é¤éå¤ç§çï¼", image)


			if __name__ == '__main__':

			load_path = 'E:\yaocai\\chenpi' # è¦å»éçæä»¶å¤¹
			save_path = 'E:\yaocai\\2\\chenpi' # ç©ºæä»¶å¤¹ï¼ç¨äºåå¨æ£æµå°çéå¤çç§ç
			quchongmethod(load_path,save_path)

¶Ô±ÈÐÂÎÄ¼þ
			@@ -0,0 +1,38 @@
			import os

			def process_file(file_path):
			with open(file_path, 'r') as file:
			lines = file.readlines()

			with open(file_path, 'w') as file:
			for line in lines:
			# å°è¯ä»æ¯ä¸è¡ä¸æåç¬¬ä¸ä¸ªæ°å
			try:
			number = int(line.split()[0])
			except ValueError:
			# å¦ææåå¤±è´¥ï¼è·³è¿å½åè¡
			file.write(line)
			continue


			new_number = number + 1

			# æ¿æ¢åå§è¡ä¸çç¬¬ä¸ä¸ªæ°å
			updated_line = line.replace(str(number), str(new_number), 1)

			# åå¥æ´æ°åçè¡
			file.write(updated_line)

			if __name__ == "__main__":
			# æå®ç®å½
			target_directory = 'E:\\herb_scan.v1i.yolov8\\valid\\labels'

			# è·åç®å½ä¸çæætxtæä»¶
			txt_files = [file for file in os.listdir(target_directory) if file.endswith('.txt')]

			# å¤çæ¯ä¸ªæä»¶
			for txt_file in txt_files:
			file_path = os.path.join(target_directory, txt_file)
			process_file(file_path)

			print("å¤çå®æï¼")

			@@ -6,12 +6,17 @@

			# æå¼æåå¤´
			cap = cv2.VideoCapture(camera_index)

			# è®¾ç½®åè¾¨ç
			# cap.set(cv2.CAP_PROP_FRAME_WIDTH, 3840) # å®½åº¦
			# cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 2160) # é«åº¦
			# æ£æ¥æåå¤´æ¯å¦æåæå¼
			if not cap.isOpened():
			print("æ æ³æå¼æåå¤´")
			exit()

			width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
			height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
			print("æåå¤´åè¾¨ç:", width, "x", height)
			# å¾çä¿åè·¯å¾
			save_path = "captured_images/"

			@@ -36,6 +41,8 @@
			print("æ æ³è¯»åæåå¤´ç»é¢")
			break

			# è£åªå¾å
			# cropped_frame = frame[750:1230, 1650:2290]
			# è°æ´å¾åå°ºå¯¸
			resized_frame = cv2.resize(frame, (target_width, target_height))

			@@ -43,20 +50,20 @@
			current_time = time.time()

			# å¦æè·ç¦»ä¸ä¸æ¬¡ä¿åå·²ç»è¿å»1ç§ï¼åä¿åå½åç»é¢
			if current_time - start_time >= 1.0:
			# çæä¿åæä»¶åï¼ä»¥å½åæ¶é´å½å
			save_name = time.strftime("%Y%m%d%H%M%S", time.localtime()) + ".jpg"
			# ä¿åè°æ´å°ºå¯¸åçå¾ç
			cv2.imwrite(save_path + save_name, resized_frame)
			print("ä¿åå¾ç:", save_name)
			# éç½®è®¡æ¶å¨
			start_time = time.time()
			# if current_time - start_time >= 3.0:
			# # çæä¿åæä»¶åï¼ä»¥å½åæ¶é´å½å
			# save_name = time.strftime("%Y%m%d%H%M%S", time.localtime()) + ".jpg"
			# # ä¿åè°æ´å°ºå¯¸åçå¾ç
			# cv2.imwrite(save_path + save_name, frame)
			# print("ä¿åå¾ç:", save_name)
			# # éç½®è®¡æ¶å¨
			# start_time = time.time()

			# æ¾ç¤ºç»é¢
			cv2.imshow("Camera", resized_frame)
			cv2.imshow("Camera", frame)

			# æ£æµæé®ï¼å¦ææä¸qé®åéåºå¾ªç¯
			if cv2.waitKey(1000) & 0xFF == ord('q'):
			if cv2.waitKey(1) & 0xFF == ord('q'):
			break

			# å³éæåå¤´

¶Ô±ÈÐÂÎÄ¼þ
			@@ -0,0 +1,105 @@
			import cv2
			import time
			import numpy as np
			import onnxruntime
			from scipy.special import softmax

			# å è½½ONNXæ¨¡å
			session = onnxruntime.InferenceSession("model/detect/best.onnx")
			# æåå¤´ç´¢å¼å·ï¼éå¸¸ä¸º0è¡¨ç¤ºç¬¬ä¸ä¸ªæåå¤´
			camera_index = 0

			# æå¼æåå¤´
			cap = cv2.VideoCapture(camera_index, cv2.CAP_DSHOW)
			# è®¾ç½®åè¾¨ç
			cap.set(cv2.CAP_PROP_FRAME_WIDTH, 3840) # å®½åº¦
			cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 2160) # é«åº¦
			# æ£æ¥æåå¤´æ¯å¦æåæå¼
			if not cap.isOpened():
			print("æ æ³æå¼æåå¤´")
			exit()

			width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
			height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
			print("æåå¤´åè¾¨ç:", width, "x", height)



			# ä»res.jsonä¸è¯»åç±»å«
			# with open("res1-2.json", "r") as f:
			# classes = eval(f.read())



			# ç®æ å¾åå°ºå¯¸
			target_width = 1024
			target_height = 768

			# è®¡æ¶å¨
			start_time = time.time()

			# å¾ªç¯è¯»åæåå¤´ç»é¢
			while True:
			ret, frame = cap.read()

			if not ret:
			print("æ æ³è¯»åæåå¤´ç»é¢")
			break

			# 19201080çå¾åï¼ä¸å¿è£åª640480çåºå
			cropped_frame = frame[int(height / 2 - target_height / 2):int(height / 2 + target_height / 2),
			int(width / 2 - target_width / 2):int(width / 2 + target_width / 2)]
			# è°æ´å¾åå°ºå¯¸
			resized_frame = cv2.resize(cropped_frame, (target_width, target_height))

			# è·åå½åæ¶é´
			current_time = time.time()

			#å¦æè·ç¦»ä¸ä¸æ¬¡ä¿åå·²ç»è¿å»1ç§ï¼åä¿åå½åç»é¢
			# if current_time - start_time >= 3.0:
			# # çæä¿åæä»¶åï¼ä»¥å½åæ¶é´å½å
			# save_name = time.strftime("%Y%m%d%H%M%S", time.localtime()) + ".jpg"
			# # ä¿åè°æ´å°ºå¯¸åçå¾ç
			# cv2.imwrite(save_path + save_name, frame)
			# print("ä¿åå¾ç:", save_name)
			# # éç½®è®¡æ¶å¨
			# start_time = time.time()

			# é¢å¤ç
			blob = cv2.dnn.blobFromImage(resized_frame, 1 / 255.0, (640, 640), swapRB=True, crop=False)

			# æ¨¡åæ¨ç
			outputs = session.run(None, {session.get_inputs()[0].name: blob})

			output = np.transpose(np.squeeze(outputs[0]));
			rows = output.shape[0]
			boxes = []
			scores = []
			class_ids = []
			for i in range(rows):
			classes_scores = output[i][4:]
			max_score = np.amax(classes_scores)
			if max_score > 0.5:
			classid = np.argmax(classes_scores)
			scores.append(max_score)
			class_ids.append(classid)

			print(class_ids)
			print(scores)
			# # è¾åºååä¸ªç±»å«
			# print("Top 5 Classes:")
			# for i in top_ten_classes:
			# print(f"{classes[i]}: {probabilities[0][i]}")

			# æ¾ç¤ºç»é¢
			cv2.imshow("Camera", resized_frame)

			# æ£æµæé®ï¼å¦ææä¸qé®åéåºå¾ªç¯
			if cv2.waitKey(1) & 0xFF == ord('q'):
			break

			# å³éæåå¤´
			cap.release()

			# å³éææçªå£
			cv2.destroyAllWindows()

¶Ô±ÈÐÂÎÄ¼þ
			@@ -0,0 +1,20 @@
			from deepspeech import Model
			import scipy.io.wavfile as wav
			import numpy as np

			# æå®æ¨¡ååè¯åå¨æä»¶è·¯å¾
			model_path = 'model/deepspeech-0.9.3-models-zh-CN.pbmm'
			scorer_path = 'model/deepspeech-0.9.3-models-zh-CN.scorer'

			# åå§åDeepSpeechæ¨¡å
			ds = Model(model_path)
			ds.enableExternalScorer(scorer_path)

			# å è½½é³é¢æä»¶
			audio_path = 'audio/input.wav'
			fs, audio = wav.read(audio_path)

			# è¿è¡è¯é³è½¬æå
			text = ds.stt(audio)

			print('Transcribed text:', text)

¶Ô±ÈÐÂÎÄ¼þ
			@@ -0,0 +1,5 @@
			from pydub import AudioSegment

			# å è½½mp3æä»¶
			mp3_audio = AudioSegment.from_mp3("input.mp3")

¶Ô±ÈÐÂÎÄ¼þ
			@@ -0,0 +1 @@
			from faster import WhisperModel

¶Ô±ÈÐÂÎÄ¼þ
			@@ -0,0 +1,10 @@
			import whisper
			model = whisper.load_model("medium")
			result = model.transcribe("audio/input.wav")

			print(result["text"])

			# tiny 72.1M æè¯´é®å¤§å®¶è¿ä¸ªè¯éç»æ¾å¨åå°äºæ¯å¡ç½åå°æ¯åå¯¹å¥½çè°¢è°¢
			# base 1XXM æå¸å¥é£ä¸ªååç»ä»æ¾å¨åå°äºæ¯7å·äºåå°æ¯å§å¯¹å¥½çè°¢è°¢å¥½
			# small 461M åä½ å¥½å¦æé é¢¨çä¸åç«å°çµ¦æ¾å¨åå°äºæ¯7èæ¨åå°æ¯å§å°å¥½çè¬è¬
			# medium 1.42G åä½ å¥½ä½ å¥½æèº«åé£å¿æä¸ªæ³å¾ç»æ¾å¨åå°äºæ¯7å·æ¥¼åå°æ¯å§å¯¹å¥½çè°¢è°¢å¥½