妹子图片
爬取清纯妹子 爬取图片,保存D盘下的image。
不多说废话,直接上码。我偷偷打码了 0.0
#!/usr/bin/Python
# -*- coding: utf-8 -*-
import urllib2
import urllib
import random
import socket
from bs4 import BeautifulSoup
from time import sleep
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; windows NT)'
headers = { 'User-Agent' : user_agent }
f = open("2.txt","w")
i = 0
a = 0
while i<1:
url = 'http://www.mmjpg.com/home/'+str(i)
socket.setdefaulttimeout(5)
req = urllib2.request(url, headers = headers)
content_stream = urllib2.urlopen(req)
content_stream = content_stream.read()
soup = BeautifulSoup(content_stream,'html5lib')
soupa = soup.select("ul > li > a")
# soupa = soupa['src']
print soupa
for j in soupa:
# print j['href']
name = j.select('img')[0]
# print name['alt']
url = j['href']
socket.setdefaulttimeout(5)
req = urllib2.Request(url, headers = headers)
try:
content_stream = urllib2.urlopen(req)
content_stream = content_stream.read()
soup = BeautifulSoup(content_stream,'html5lib')
soup = soup.select('#page > a')
imgs = soup[-2].get_text()
print imgs
lists = 1
while lists<=int(imgs):
print lists
url = j['href']
socket.setdefaulttimeout(5)
req = urllib2.Request(url+'/'+str(lists), headers = headers)
try:
content_stream = urllib2.urlopen(req)
content_stream = content_stream.read()
soup = BeautifulSoup(content_stream,'html5lib')
soupa = soup.select('#content img')[0]
print soupa
lists+=1
f.write(soupa['src'].encode('utf-8').strip()+'\n')
path = 'D:\\image\\'+name['alt']+str(lists)+'.jpg'
urllib.urlretrieve(soupa['src'],path)
sleep(random.uniform(0.5,1))
except Exception,e:
print u'漏掉一张图片'
sleep(random.uniform(0.5,1))
except Exception,e:
print u'漏掉一个妹子'
sleep(random.uniform(0.5,1))
i+=1
相关阅读
网站:http://www.meizitu.com/ 目标:用BeautifulSoup解析网页源代码,获取图片. 图片链接: # /home/wl/PycharmProjects/untitled
现在图章的使用可以说是很广泛的,不管是个人还是企业都或多或少有着他们自己专属的图章,更多的是运用在商户,个人爱好喜欢制作图章的