From 776a127b8db01cd4338f4db2a84ea567a65bff9f Mon Sep 17 00:00:00 2001
From: baoshiwei <baoshiwei@shlanbao.cn>
Date: 星期二, 15 四月 2025 13:52:04 +0800
Subject: [PATCH] rjuq

---
 pachong.py |   36 +++++++++++++++++++++++++-----------
 1 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/pachong.py b/pachong.py
index 4841de4..d82db8f 100644
--- a/pachong.py
+++ b/pachong.py
@@ -1,13 +1,20 @@
+import concurrent.futures
 import os
 import time
 import requests
 import re
 
+
 def imgdata_set(save_path, word, epoch):
+    if not os.path.exists(save_path):
+        os.makedirs(save_path)
+    else:
+        return 0
     q = 0     # 鍋滄鐖彇鍥剧墖鏉′欢
     a = 0     # 鍥剧墖鍚嶇О
     while(True):
         time.sleep(1)
+        print("寮�濮嬬埇鍙栧浘鐗�")
         url = "https://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word={}&pn={}&ct=&ic=0&lm=-1&width=0&height=0".format(word, q)
         # word=闇�瑕佹悳绱㈢殑鍚嶅瓧
         headers = {
@@ -16,21 +23,28 @@
         response = requests.get(url, headers=headers)  # 鍙戦�佽姹傝幏鍙栧搷搴�
         html = response.text  # 鑾峰彇鍝嶅簲鐨凥TML鍐呭
         urls = re.findall('"objURL":"(.*?)"', html)  # 浣跨敤姝e垯琛ㄨ揪寮忔彁鍙栧浘鐗嘦RL
-        for url in urls:
-            try:
-                print(a)  # 鍥剧墖鐨勫悕瀛�
-                response = requests.get(url, headers=headers)  # 鍙戦�佽姹傝幏鍙栧浘鐗囧搷搴擡:\yaocai\juhua
-                image = response.content  # 鑾峰彇鍥剧墖鍐呭
-                with open(os.path.join(save_path, "{}.jpg".format(a)), 'wb') as f:  # 灏嗗浘鐗囧唴瀹逛繚瀛樺埌鎸囧畾璺緞
-                    f.write(image)
-                a = a + 1
-            except Exception as e:
-                pass
-            continue
+        print(len(urls))
+        # 浣跨敤concurrent.futures瀹炵幇骞跺彂涓嬭浇
+        with concurrent.futures.ThreadPoolExecutor( max_workers=10) as executor:
+            # 鎻愪氦鎵�鏈変笅杞戒换鍔″苟鏀堕泦future瀵硅薄
+            futures = [executor.submit(download_image, index, headers,save_path,url ) for index,url in enumerate(urls)]
         q = q + 20
         if (q / 20) >= int(epoch):
             break
 
+
+def download_image(a, headers, save_path, url):
+        try:
+            print(a)  # 鍥剧墖鐨勫悕瀛�
+            response = requests.get(url, headers=headers, timeout=10)  # 鍙戦�佽姹傝幏鍙栧浘鐗囧搷搴�
+            # 濡傛灉娌℃湁涓�鐩村搷搴旀�庝箞澶勭悊
+            image = response.content  # 鑾峰彇鍥剧墖鍐呭
+            with open(os.path.join(save_path, "{}.jpg".format(a)), 'wb') as f:  # 灏嗗浘鐗囧唴瀹逛繚瀛樺埌鎸囧畾璺緞
+                f.write(image)
+        except Exception as e:
+            pass
+
+
 if __name__ == "__main__":
     save_path = input('浣犳兂淇濆瓨鐨勮矾寰勶細')  # 璇㈤棶鐢ㄦ埛淇濆瓨璺緞
     word = input('浣犳兂瑕佷笅杞戒粈涔堝浘鐗囷紵璇疯緭鍏�:')  # 璇㈤棶鐢ㄦ埛鎼滅储鍏抽敭璇�

--
Gitblit v1.9.3