From 776a127b8db01cd4338f4db2a84ea567a65bff9f Mon Sep 17 00:00:00 2001 From: baoshiwei <baoshiwei@shlanbao.cn> Date: 星期二, 15 四月 2025 13:52:04 +0800 Subject: [PATCH] rjuq --- pachong.py | 36 +++++++++++++++++++++++++----------- 1 files changed, 25 insertions(+), 11 deletions(-) diff --git a/pachong.py b/pachong.py index 4841de4..d82db8f 100644 --- a/pachong.py +++ b/pachong.py @@ -1,13 +1,20 @@ +import concurrent.futures import os import time import requests import re + def imgdata_set(save_path, word, epoch): + if not os.path.exists(save_path): + os.makedirs(save_path) + else: + return 0 q = 0 # 鍋滄鐖彇鍥剧墖鏉′欢 a = 0 # 鍥剧墖鍚嶇О while(True): time.sleep(1) + print("寮�濮嬬埇鍙栧浘鐗�") url = "https://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word={}&pn={}&ct=&ic=0&lm=-1&width=0&height=0".format(word, q) # word=闇�瑕佹悳绱㈢殑鍚嶅瓧 headers = { @@ -16,21 +23,28 @@ response = requests.get(url, headers=headers) # 鍙戦�佽姹傝幏鍙栧搷搴� html = response.text # 鑾峰彇鍝嶅簲鐨凥TML鍐呭 urls = re.findall('"objURL":"(.*?)"', html) # 浣跨敤姝e垯琛ㄨ揪寮忔彁鍙栧浘鐗嘦RL - for url in urls: - try: - print(a) # 鍥剧墖鐨勫悕瀛� - response = requests.get(url, headers=headers) # 鍙戦�佽姹傝幏鍙栧浘鐗囧搷搴擡:\yaocai\juhua - image = response.content # 鑾峰彇鍥剧墖鍐呭 - with open(os.path.join(save_path, "{}.jpg".format(a)), 'wb') as f: # 灏嗗浘鐗囧唴瀹逛繚瀛樺埌鎸囧畾璺緞 - f.write(image) - a = a + 1 - except Exception as e: - pass - continue + print(len(urls)) + # 浣跨敤concurrent.futures瀹炵幇骞跺彂涓嬭浇 + with concurrent.futures.ThreadPoolExecutor( max_workers=10) as executor: + # 鎻愪氦鎵�鏈変笅杞戒换鍔″苟鏀堕泦future瀵硅薄 + futures = [executor.submit(download_image, index, headers,save_path,url ) for index,url in enumerate(urls)] q = q + 20 if (q / 20) >= int(epoch): break + +def download_image(a, headers, save_path, url): + try: + print(a) # 鍥剧墖鐨勫悕瀛� + response = requests.get(url, headers=headers, timeout=10) # 鍙戦�佽姹傝幏鍙栧浘鐗囧搷搴� + # 濡傛灉娌℃湁涓�鐩村搷搴旀�庝箞澶勭悊 + image = response.content # 鑾峰彇鍥剧墖鍐呭 + with open(os.path.join(save_path, "{}.jpg".format(a)), 'wb') as f: # 灏嗗浘鐗囧唴瀹逛繚瀛樺埌鎸囧畾璺緞 + f.write(image) + except Exception as e: + pass + + if __name__ == "__main__": save_path = input('浣犳兂淇濆瓨鐨勮矾寰勶細') # 璇㈤棶鐢ㄦ埛淇濆瓨璺緞 word = input('浣犳兂瑕佷笅杞戒粈涔堝浘鐗囷紵璇疯緭鍏�:') # 璇㈤棶鐢ㄦ埛鎼滅储鍏抽敭璇� -- Gitblit v1.9.3