#!/usr/bin/env python
# -*- coding:utf8 -*-
# auther; 18793
# Date:2019/5/22 16:42
# filename: 正则表达式.py
import urllib.request
import os
import re
url = "http://699pic.com/zhuanti/liuyiertongjie.html"
def findallimageurl(htmlstr):
'''
从HTML代码中查找匹配的字符串
:param htmlstr: url路径信息
:return:
'''
pattern = r'http://\S+(?:\png|\.jpg)'
return re.findall(pattern, htmlstr)
def getfilename(urlstr):
'''
根据图片链接地址截取图片名
:param urlstr:
:return:
'''
pos = urlstr.rfind('/')
return urlstr[pos + 1:]
# 分析获取的url列表
url_list = []
req = urllib.request.Request(url)
with urllib.request.urlopen(req) as response:
data = response.read()
htmlstr = data.decode('gbk')
url_list = findallimageurl(htmlstr)
for imagesrc in url_list:
# 根据图片地址下载
req = urllib.request.Request(imagesrc)
with urllib.request.urlopen(req) as response:
data = response.read()
# 过滤掉小于10KB的图片
if len(data) < 1024 * 100:
continue
# 创建download文件夹
if not os.path.exists('download'):
os.mkdir('download')
# 获得图片文件名
filename = getfilename(imagesrc)
filename = 'download/' + filename
# 保存图片到本地
with open(filename, 'wb') as f:
f.write(data)
print("下载图片", filename)