| | |
| | | import concurrent.futures |
| | | import os |
| | | import time |
| | | import requests |
| | | import re |
| | | |
| | | |
| | | def imgdata_set(save_path, word, epoch): |
| | | if not os.path.exists(save_path): |
| | | os.makedirs(save_path) |
| | | else: |
| | | return 0 |
| | | q = 0 # 停止爬取图片条件 |
| | | a = 0 # 图片名称 |
| | | while(True): |
| | | time.sleep(1) |
| | | print("开始爬取图片") |
| | | url = "https://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word={}&pn={}&ct=&ic=0&lm=-1&width=0&height=0".format(word, q) |
| | | # word=需要搜索的名字 |
| | | headers = { |
| | |
| | | response = requests.get(url, headers=headers) # 发送请求获取响应 |
| | | html = response.text # 获取响应的HTML内容 |
| | | urls = re.findall('"objURL":"(.*?)"', html) # 使用正则表达式提取图片URL |
| | | for url in urls: |
| | | try: |
| | | print(a) # 图片的名字 |
| | | response = requests.get(url, headers=headers) # 发送请求获取图片响应E:\yaocai\juhua |
| | | image = response.content # 获取图片内容 |
| | | with open(os.path.join(save_path, "{}.jpg".format(a)), 'wb') as f: # 将图片内容保存到指定路径 |
| | | f.write(image) |
| | | a = a + 1 |
| | | except Exception as e: |
| | | pass |
| | | continue |
| | | print(len(urls)) |
| | | # 使用concurrent.futures实现并发下载 |
| | | with concurrent.futures.ThreadPoolExecutor( max_workers=10) as executor: |
| | | # 提交所有下载任务并收集future对象 |
| | | futures = [executor.submit(download_image, index, headers,save_path,url ) for index,url in enumerate(urls)] |
| | | q = q + 20 |
| | | if (q / 20) >= int(epoch): |
| | | break |
| | | |
| | | |
| | | def download_image(a, headers, save_path, url): |
| | | try: |
| | | print(a) # 图片的名字 |
| | | response = requests.get(url, headers=headers, timeout=10) # 发送请求获取图片响应 |
| | | # 如果没有一直响应怎么处理 |
| | | image = response.content # 获取图片内容 |
| | | with open(os.path.join(save_path, "{}.jpg".format(a)), 'wb') as f: # 将图片内容保存到指定路径 |
| | | f.write(image) |
| | | except Exception as e: |
| | | pass |
| | | |
| | | |
| | | if __name__ == "__main__": |
| | | save_path = input('你想保存的路径:') # 询问用户保存路径 |
| | | word = input('你想要下载什么图片?请输入:') # 询问用户搜索关键词 |