import os
|
import time
|
import requests
|
import re
|
|
def imgdata_set(save_path, word, epoch):
|
q = 0 # 停止爬取图片条件
|
a = 0 # 图片名称
|
while(True):
|
time.sleep(1)
|
url = "https://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word={}&pn={}&ct=&ic=0&lm=-1&width=0&height=0".format(word, q)
|
# word=需要搜索的名字
|
headers = {
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Edg/88.0.705.56'
|
}
|
response = requests.get(url, headers=headers) # 发送请求获取响应
|
html = response.text # 获取响应的HTML内容
|
urls = re.findall('"objURL":"(.*?)"', html) # 使用正则表达式提取图片URL
|
for url in urls:
|
try:
|
print(a) # 图片的名字
|
response = requests.get(url, headers=headers) # 发送请求获取图片响应E:\yaocai\juhua
|
image = response.content # 获取图片内容
|
with open(os.path.join(save_path, "{}.jpg".format(a)), 'wb') as f: # 将图片内容保存到指定路径
|
f.write(image)
|
a = a + 1
|
except Exception as e:
|
pass
|
continue
|
q = q + 20
|
if (q / 20) >= int(epoch):
|
break
|
|
if __name__ == "__main__":
|
save_path = input('你想保存的路径:') # 询问用户保存路径
|
word = input('你想要下载什么图片?请输入:') # 询问用户搜索关键词
|
epoch = input('你想要下载几轮图片?请输入(一轮为60张左右图片):') # 需要迭代几次图片
|
imgdata_set(save_path, word, epoch)
|