python/pyTools.git

			@@ -1,13 +1,20 @@
			import concurrent.futures
			import os
			import time
			import requests
			import re


			def imgdata_set(save_path, word, epoch):
			if not os.path.exists(save_path):
			os.makedirs(save_path)
			else:
			return 0
			q = 0 # 停止爬取图片条件
			a = 0 # 图片名称
			while(True):
			time.sleep(1)
			print("开始爬取图片")
			url = "https://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word={}&pn={}&ct=&ic=0&lm=-1&width=0&height=0".format(word, q)
			# word=需要搜索的名字
			headers = {
			@@ -16,21 +23,28 @@
			response = requests.get(url, headers=headers) # 发送请求获取响应
			html = response.text # 获取响应的HTML内容
			urls = re.findall('"objURL":"(.*?)"', html) # 使用正则表达式提取图片URL
			for url in urls:
			try:
			print(a) # 图片的名字
			response = requests.get(url, headers=headers) # 发送请求获取图片响应E:\yaocai\juhua
			image = response.content # 获取图片内容
			with open(os.path.join(save_path, "{}.jpg".format(a)), 'wb') as f: # 将图片内容保存到指定路径
			f.write(image)
			a = a + 1
			except Exception as e:
			pass
			continue
			print(len(urls))
			# 使用concurrent.futures实现并发下载
			with concurrent.futures.ThreadPoolExecutor( max_workers=10) as executor:
			# 提交所有下载任务并收集future对象
			futures = [executor.submit(download_image, index, headers,save_path,url ) for index,url in enumerate(urls)]
			q = q + 20
			if (q / 20) >= int(epoch):
			break


			def download_image(a, headers, save_path, url):
			try:
			print(a) # 图片的名字
			response = requests.get(url, headers=headers, timeout=10) # 发送请求获取图片响应
			# 如果没有一直响应怎么处理
			image = response.content # 获取图片内容
			with open(os.path.join(save_path, "{}.jpg".format(a)), 'wb') as f: # 将图片内容保存到指定路径
			f.write(image)
			except Exception as e:
			pass


			if __name__ == "__main__":
			save_path = input('你想保存的路径：') # 询问用户保存路径
			word = input('你想要下载什么图片？请输入:') # 询问用户搜索关键词