全书网
爬取流程:
第一步:
第二步:
第三步:
第四步:
第五步:
代码流程:
import requests
import re
import json
from lxml import etree
import urllib.parse
import urllib
header = {
"User-Agent":"Mozilla/5.0 (X11; linux x86_64) APPleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
}
#通过请求找到 并解析标签下的链接 标题 作者 等等...
def qingqiu(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//ul[@class="seeWell cf"]/li')
for i in b:
#小说链接详情
jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
yuedxq(jiexi)
#上个函数的请求 解析获取开始阅读的链接
def yuedxq(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="detail"]/p[@class="b-info"]')
for i in b:
jiexi = i.xpath('.//p[@class="b-oper"]/a[@class="reader"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(pinjie)
hqzangj(pinjie)
#通过上个函数的链接获取 文章列表的标题 和链接
def hqzangj(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="clearfix dirconone"]/li')
for i in b:
title = i.xpath('./a/@title')
jiexi = i.xpath('./a/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(title,pinjie)
readxq(pinjie)
#通过上个链接 解析文章内容和标题
def readxq(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="mainContenr"]/text()')
c = a.xpath('//strong[@class="l jieqi_title"]/text()')
print(c,b)
#遍历文章
for i in b:
writes(i,c)
#传递参数 获取文章内容标题 将文章放在txt文件中
def writes(t,c):
with open('{}.txt'.format(c),'a+') as f:
f.write(t)
print("-------------------------------------------------------------------------------")
#第二个标签 武侠修真
def qingqiu2(url_q):
response = requests.get(url_q,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//ul[@class="seeWell cf"]/li')
for i in b:
#小说链接详情
jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
yuedxq2(jiexi)
def yuedxq2(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="detail"]/p[@class="b-info"]')
for i in b:
jiexi = i.xpath('.//p[@class="b-oper"]/a[@class="reader"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(pinjie)
hqzangj2(pinjie)
def hqzangj2(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="clearfix dirconone"]/li')
for i in b:
title = i.xpath('./a/@title')
jiexi = i.xpath('./a/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(title,pinjie)
readxq2(pinjie)
def readxq2(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="mainContenr"]/text()')
c = a.xpath('//strong[@class="l jieqi_title"]/text()')
print(c,b)
for i in b:
writes2(i,c)
def writes2(t,c):
with open('{}.txt'.format(c),'a+') as f:
f.write(t)
print("-------------------------------------------------------------------------------")
def qingqiu3(url_w):
response = requests.get(url_w,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//ul[@class="seeWell cf"]/li')
for i in b:
#小说链接详情
jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
yuedxq3(jiexi)
def yuedxq3(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="detail"]/p[@class="b-info"]')
for i in b:
jiexi = i.xpath('.//p[@class="b-oper"]/a[@class="reader"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(pinjie)
hqzangj3(pinjie)
def hqzangj3(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="clearfix dirconone"]/li')
for i in b:
title = i.xpath('./a/@title')
jiexi = i.xpath('./a/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(title,pinjie)
readxq3(pinjie)
def readxq3(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="mainContenr"]/text()')
c = a.xpath('//strong[@class="l jieqi_title"]/text()')
print(c,b)
for i in b:
writes3(i,c)
def writes3(t,c):
with open('{}.txt'.format(c),'a+') as f:
f.write(t)
print("-------------------------------------------------------------------------------")
def qingqiu4(url_e):
response = requests.get(url_e,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//ul[@class="seeWell cf"]/li')
for i in b:
#小说链接详情
jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
yuedxq4(jiexi)
def yuedxq4(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="detail"]/p[@class="b-info"]')
for i in b:
jiexi = i.xpath('.//p[@class="b-oper"]/a[@class="reader"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(pinjie)
hqzangj4(pinjie)
def hqzangj4(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="clearfix dirconone"]/li')
for i in b:
title = i.xpath('./a/@title')
jiexi = i.xpath('./a/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(title,pinjie)
readxq4(pinjie)
def readxq4(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="mainContenr"]/text()')
c = a.xpath('//strong[@class="l jieqi_title"]/text()')
print(c,b)
for i in b:
writes4(i,c)
def writes4(t,c):
with open('{}.txt'.format(c),'a+') as f:
f.write(t)
print("-------------------------------------------------------------------------------")
def qingqiu5(url_e):
response = requests.get(url_e,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//ul[@class="seeWell cf"]/li')
for i in b:
#小说链接详情
jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
yuedxq5(jiexi)
def yuedxq5(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="detail"]/p[@class="b-info"]')
for i in b:
jiexi = i.xpath('.//p[@class="b-oper"]/a[@class="reader"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(pinjie)
hqzangj5(pinjie)
def hqzangj5(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="clearfix dirconone"]/li')
for i in b:
title = i.xpath('./a/@title')
jiexi = i.xpath('./a/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(title,pinjie)
readxq5(pinjie)
def readxq5(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="mainContenr"]/text()')
c = a.xpath('//strong[@class="l jieqi_title"]/text()')
print(c,b)
for i in b:
writes5(i,c)
def writes5(t,c):
with open('{}.txt'.format(c),'a+') as f:
f.write(t)
print("-------------------------------------------------------------------------------")
def qingqiu6(url_r):
response = requests.get(url_r,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//ul[@class="seeWell cf"]/li')
for i in b:
#小说链接详情
jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
yuedxq6(jiexi)
def yuedxq6(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="detail"]/p[@class="b-info"]')
for i in b:
jiexi = i.xpath('.//p[@class="b-oper"]/a[@class="reader"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(pinjie)
hqzangj6(pinjie)
def hqzangj6(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="clearfix dirconone"]/li')
for i in b:
title = i.xpath('./a/@title')
jiexi = i.xpath('./a/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(title,pinjie)
readxq6(pinjie)
def readxq6(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="mainContenr"]/text()')
c = a.xpath('//strong[@class="l jieqi_title"]/text()')
print(c,b)
for i in b:
writes6(i,c)
def writes6(t,c):
with open('{}.txt'.format(c),'a+') as f:
f.write(t)
print("-------------------------------------------------------------------------------")
def qingqiu7(url_t):
response = requests.get(url_t,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//ul[@class="seeWell cf"]/li')
for i in b:
#小说链接详情
jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
yuedxq7(jiexi)
def yuedxq7(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="detail"]/p[@class="b-info"]')
for i in b:
jiexi = i.xpath('.//p[@class="b-oper"]/a[@class="reader"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(pinjie)
hqzangj7(pinjie)
def hqzangj7(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="clearfix dirconone"]/li')
for i in b:
title = i.xpath('./a/@title')
jiexi = i.xpath('./a/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(title,pinjie)
readxq7(pinjie)
def readxq7(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="mainContenr"]/text()')
c = a.xpath('//strong[@class="l jieqi_title"]/text()')
print(c,b)
for i in b:
writes7(i,c)
def writes7(t,c):
with open('{}.txt'.format(c),'a+') as f:
f.write(t)
print("-------------------------------------------------------------------------------")
def qingqiu8(url_y):
response = requests.get(url_y,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//ul[@class="seeWell cf"]/li')
for i in b:
#小说链接详情
jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
yuedxq8(jiexi)
def yuedxq8(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="detail"]/p[@class="b-info"]')
for i in b:
jiexi = i.xpath('.//p[@class="b-oper"]/a[@class="reader"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(pinjie)
hqzangj8(pinjie)
def hqzangj8(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="clearfix dirconone"]/li')
for i in b:
title = i.xpath('./a/@title')
jiexi = i.xpath('./a/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(title,pinjie)
readxq8(pinjie)
def readxq8(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="mainContenr"]/text()')
c = a.xpath('//strong[@class="l jieqi_title"]/text()')
print(c,b)
for i in b:
writes8(i,c)
def writes8(t,c):
with open('{}.txt'.format(c),'a+') as f:
f.write(t)
print("-------------------------------------------------------------------------------")
def qingqiu9(url_u):
response = requests.get(url_u,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//ul[@class="seeWell cf"]/li')
for i in b:
#小说链接详情
jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
yuedxq9(jiexi)
def yuedxq9(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="detail"]/p[@class="b-info"]')
for i in b:
jiexi = i.xpath('.//p[@class="b-oper"]/a[@class="reader"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(pinjie)
hqzangj9(pinjie)
def hqzangj9(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="clearfix dirconone"]/li')
for i in b:
title = i.xpath('./a/@title')
jiexi = i.xpath('./a/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(title,pinjie)
readxq9(pinjie)
def readxq9(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="mainContenr"]/text()')
c = a.xpath('//strong[@class="l jieqi_title"]/text()')
print(c,b)
for i in b:
writes9(i,c)
def writes9(t,c):
with open('{}.txt'.format(c),'a+') as f:
f.write(t)
print("-------------------------------------------------------------------------------")
def qingqiu10(url_i):
response = requests.get(url_i,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//ul[@class="seeWell cf"]/li')
for i in b:
#小说链接详情
jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
yuedxq10(jiexi)
def yuedxq10(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="detail"]/p[@class="b-info"]')
for i in b:
jiexi = i.xpath('.//p[@class="b-oper"]/a[@class="reader"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(pinjie)
hqzangj10(pinjie)
def hqzangj10(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="clearfix dirconone"]/li')
for i in b:
title = i.xpath('./a/@title')
jiexi = i.xpath('./a/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(title,pinjie)
readxq10(pinjie)
def readxq10(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="mainContenr"]/text()')
c = a.xpath('//strong[@class="l jieqi_title"]/text()')
print(c,b)
for i in b:
writes10(i,c)
def writes10(t,c):
with open('{}.txt'.format(c),'a+') as f:
f.write(t)
print("-------------------------------------------------------------------------------")
def qingqiu11(url_o):
response = requests.get(url_o,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//ul[@class="seeWell cf"]/li')
for i in b:
#小说链接详情
jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
yuedxq11(jiexi)
def yuedxq11(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="detail"]/p[@class="b-info"]')
for i in b:
jiexi = i.xpath('.//p[@class="b-oper"]/a[@class="reader"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(pinjie)
hqzangj11(pinjie)
def hqzangj11(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="clearfix dirconone"]/li')
for i in b:
title = i.xpath('./a/@title')
jiexi = i.xpath('./a/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(title,pinjie)
readxq11(pinjie)
def readxq11(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="mainContenr"]/text()')
c = a.xpath('//strong[@class="l jieqi_title"]/text()')
print(c,b)
for i in b:
writes11(i,c)
def writes11(t,c):
with open('{}.txt'.format(c),'a+') as f:
f.write(t)
print("-------------------------------------------------------------------------------")
def qingqiu12(url_p):
response = requests.get(url_p,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//ul[@class="seeWell cf"]/li')
for i in b:
#小说链接详情
jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
yuedxq12(jiexi)
def yuedxq12(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="detail"]/p[@class="b-info"]')
for i in b:
jiexi = i.xpath('.//p[@class="b-oper"]/a[@class="reader"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(pinjie)
jinru(pinjie)
def jinru(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//p[@class="item"]/ul')
for i in b:
jiexi = i.xpath('.//li/a[@class="orange"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(pinjie)
if __name__ == '__main__':
for i in range(1,2): #共982页
url = "http://www.quanshuwang.com/list/1_%s.html"%str(i)
qingqiu(url)
# url_q = "http://www.quanshuwang.com/list/2_1.html"
# qingqiu2(url_q)
# url_w = "http://www.quanshuwang.com/list/3_1.html"
# qingqiu3(url_w)
# url_e = "http://www.quanshuwang.com/list/4_1.html"
# qingqiu4(url_e)
# url_e = "http://www.quanshuwang.com/list/5_1.html"
# qingqiu5(url_e)
# url_r = "http://www.quanshuwang.com/list/6_1.html"
# qingqiu6(url_r)
# url_t = "http://www.quanshuwang.com/list/7_1.html"
# qingqiu7(url_t)
# url_y = "http://www.quanshuwang.com/list/8_1.html"
# qingqiu8(url_y)
# url_u = "http://www.quanshuwang.com/list/9_1.html"
# qingqiu9(url_u)
# url_i = "http://www.quanshuwang.com/list/10_1.html"
# qingqiu10(url_i)
# url_o = "http://www.quanshuwang.com/list/11_1.html"
# qingqiu11(url_o)
# url_p = "http://www.quanshuwang.com/list/12_1.html"
# qingqiu12(url_p)
相关阅读
###这是一篇求助文,我能获取图片并变成字符串,但是无法获取位移量### 前两坛突发奇想想要爬取自如网的租房数据,本来以为能够请求+美
[摘要]虽然人工智能正在不断地发展,但是它还没有发展到具备“语言加工能力”的程度;没有人类编写的程序,它是绝对做不到自己创作的。
(1)使用技术 python 3.5.2、requests 、BeautifulSoup (2)背景介绍 爬取的网站http://tu.duowan.com/tag/5037.html 需要爬取的内容
# coding=utf-8import requestsr = requests.get("https://www.baidu.com/img/bd_logo1.png")with open("baidu.png","wb")
在前几年,穿越系列的电视剧或电影引起了不少的话题。当今各系列题材的影视剧的热播也同样成为人们茶余饭后的一个讨论话题,而在这些