必威体育Betway必威体育官网
当前位置:首页 > IT技术

爬取全书网 小说数据 python全网站 11标签代码

时间:2019-08-12 21:15:50来源:IT技术作者:seo实验室小编阅读:84次「手机版」
 

全书网

爬取流程:

第一步:

在这里插入图片描述

第二步:

在这里插入图片描述

第三步:

在这里插入图片描述

第四步:

在这里插入图片描述

第五步:

在这里插入图片描述

代码流程:

import requests
import re
import json
from lxml import etree
import urllib.parse
import urllib

header = {
    "User-Agent":"Mozilla/5.0 (X11; linux x86_64) APPleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
    
}
#通过请求找到 并解析标签下的链接 标题 作者 等等...
def qingqiu(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//ul[@class="seeWell cf"]/li')
    for i in b:
        #小说链接详情
        jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
        jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
        jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
        print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
        yuedxq(jiexi)
#上个函数的请求 解析获取开始阅读的链接
def yuedxq(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="detail"]/p[@class="b-info"]')
    for i in b:
        jiexi = i.xpath('.//p[@class="b-oper"]/a[@class="reader"]/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        print(pinjie)
        hqzangj(pinjie)
#通过上个函数的链接获取 文章列表的标题 和链接
def hqzangj(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="clearfix dirconone"]/li')
    for i in b:
        title = i.xpath('./a/@title')
        jiexi = i.xpath('./a/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        print(title,pinjie)
        readxq(pinjie)
#通过上个链接 解析文章内容和标题
def readxq(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="mainContenr"]/text()')
    c = a.xpath('//strong[@class="l jieqi_title"]/text()')
    print(c,b)
    #遍历文章
    for i in b:
        writes(i,c)
#传递参数  获取文章内容标题 将文章放在txt文件中
def writes(t,c):
    with open('{}.txt'.format(c),'a+') as f:
        f.write(t)

print("-------------------------------------------------------------------------------")






#第二个标签   武侠修真
def qingqiu2(url_q):
    response = requests.get(url_q,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//ul[@class="seeWell cf"]/li')
    for i in b:
        #小说链接详情
        jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
        jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
        jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
        print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
        yuedxq2(jiexi)
def yuedxq2(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="detail"]/p[@class="b-info"]')
    for i in b:
        jiexi = i.xpath('.//p[@class="b-oper"]/a[@class="reader"]/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        print(pinjie)
        hqzangj2(pinjie)
def hqzangj2(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="clearfix dirconone"]/li')
    for i in b:
        title = i.xpath('./a/@title')
        jiexi = i.xpath('./a/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        print(title,pinjie)
        readxq2(pinjie)
def readxq2(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="mainContenr"]/text()')
    c = a.xpath('//strong[@class="l jieqi_title"]/text()')
    print(c,b)
    for i in b:
        writes2(i,c)
def writes2(t,c):
    with open('{}.txt'.format(c),'a+') as f:
        f.write(t)
print("-------------------------------------------------------------------------------")



def qingqiu3(url_w):
    response = requests.get(url_w,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//ul[@class="seeWell cf"]/li')
    for i in b:
        #小说链接详情
        jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
        jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
        jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
        print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
        yuedxq3(jiexi)
def yuedxq3(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="detail"]/p[@class="b-info"]')
    for i in b:
        jiexi = i.xpath('.//p[@class="b-oper"]/a[@class="reader"]/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        print(pinjie)
        hqzangj3(pinjie)
def hqzangj3(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="clearfix dirconone"]/li')
    for i in b:
        title = i.xpath('./a/@title')
        jiexi = i.xpath('./a/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        print(title,pinjie)
        readxq3(pinjie)
def readxq3(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="mainContenr"]/text()')
    c = a.xpath('//strong[@class="l jieqi_title"]/text()')
    print(c,b)
    for i in b:
        writes3(i,c)
def writes3(t,c):
    with open('{}.txt'.format(c),'a+') as f:
        f.write(t)

print("-------------------------------------------------------------------------------")        






def qingqiu4(url_e):
    response = requests.get(url_e,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//ul[@class="seeWell cf"]/li')
    for i in b:
        #小说链接详情
        jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
        jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
        jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
        print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
        yuedxq4(jiexi)
def yuedxq4(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="detail"]/p[@class="b-info"]')
    for i in b:
        jiexi = i.xpath('.//p[@class="b-oper"]/a[@class="reader"]/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        print(pinjie)
        hqzangj4(pinjie)
def hqzangj4(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="clearfix dirconone"]/li')
    for i in b:
        title = i.xpath('./a/@title')
        jiexi = i.xpath('./a/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        print(title,pinjie)
        readxq4(pinjie)
def readxq4(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="mainContenr"]/text()')
    c = a.xpath('//strong[@class="l jieqi_title"]/text()')
    print(c,b)
    for i in b:
        writes4(i,c)
def writes4(t,c):
    with open('{}.txt'.format(c),'a+') as f:
        f.write(t)

print("-------------------------------------------------------------------------------")



def qingqiu5(url_e):
    response = requests.get(url_e,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//ul[@class="seeWell cf"]/li')
    for i in b:
        #小说链接详情
        jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
        jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
        jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
        print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
        yuedxq5(jiexi)
def yuedxq5(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="detail"]/p[@class="b-info"]')
    for i in b:
        jiexi = i.xpath('.//p[@class="b-oper"]/a[@class="reader"]/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        print(pinjie)
        hqzangj5(pinjie)
def hqzangj5(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="clearfix dirconone"]/li')
    for i in b:
        title = i.xpath('./a/@title')
        jiexi = i.xpath('./a/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        print(title,pinjie)
        readxq5(pinjie)
def readxq5(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="mainContenr"]/text()')
    c = a.xpath('//strong[@class="l jieqi_title"]/text()')
    print(c,b)
    for i in b:
        writes5(i,c)
def writes5(t,c):
    with open('{}.txt'.format(c),'a+') as f:
        f.write(t)

print("-------------------------------------------------------------------------------")



def qingqiu6(url_r):
    response = requests.get(url_r,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//ul[@class="seeWell cf"]/li')
    for i in b:
        #小说链接详情
        jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
        jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
        jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
        print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
        yuedxq6(jiexi)
def yuedxq6(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="detail"]/p[@class="b-info"]')
    for i in b:
        jiexi = i.xpath('.//p[@class="b-oper"]/a[@class="reader"]/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        print(pinjie)
        hqzangj6(pinjie)
def hqzangj6(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="clearfix dirconone"]/li')
    for i in b:
        title = i.xpath('./a/@title')
        jiexi = i.xpath('./a/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        print(title,pinjie)
        readxq6(pinjie)
def readxq6(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="mainContenr"]/text()')
    c = a.xpath('//strong[@class="l jieqi_title"]/text()')
    print(c,b)
    for i in b:
        writes6(i,c)
def writes6(t,c):
    with open('{}.txt'.format(c),'a+') as f:
        f.write(t)

print("-------------------------------------------------------------------------------")



def qingqiu7(url_t):
    response = requests.get(url_t,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//ul[@class="seeWell cf"]/li')
    for i in b:
        #小说链接详情
        jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
        jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
        jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
        print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
        yuedxq7(jiexi)
def yuedxq7(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="detail"]/p[@class="b-info"]')
    for i in b:
        jiexi = i.xpath('.//p[@class="b-oper"]/a[@class="reader"]/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        print(pinjie)
        hqzangj7(pinjie)
def hqzangj7(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="clearfix dirconone"]/li')
    for i in b:
        title = i.xpath('./a/@title')
        jiexi = i.xpath('./a/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        print(title,pinjie)
        readxq7(pinjie)
def readxq7(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="mainContenr"]/text()')
    c = a.xpath('//strong[@class="l jieqi_title"]/text()')
    print(c,b)
    for i in b:
        writes7(i,c)
def writes7(t,c):
    with open('{}.txt'.format(c),'a+') as f:
        f.write(t)

print("-------------------------------------------------------------------------------")



def qingqiu8(url_y):
    response = requests.get(url_y,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//ul[@class="seeWell cf"]/li')
    for i in b:
        #小说链接详情
        jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
        jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
        jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
        print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
        yuedxq8(jiexi)
def yuedxq8(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="detail"]/p[@class="b-info"]')
    for i in b:
        jiexi = i.xpath('.//p[@class="b-oper"]/a[@class="reader"]/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        print(pinjie)
        hqzangj8(pinjie)
def hqzangj8(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="clearfix dirconone"]/li')
    for i in b:
        title = i.xpath('./a/@title')
        jiexi = i.xpath('./a/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        print(title,pinjie)
        readxq8(pinjie)
def readxq8(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="mainContenr"]/text()')
    c = a.xpath('//strong[@class="l jieqi_title"]/text()')
    print(c,b)
    for i in b:
        writes8(i,c)
def writes8(t,c):
    with open('{}.txt'.format(c),'a+') as f:
        f.write(t)

print("-------------------------------------------------------------------------------")

def qingqiu9(url_u):
    response = requests.get(url_u,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//ul[@class="seeWell cf"]/li')
    for i in b:
        #小说链接详情
        jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
        jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
        jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
        print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
        yuedxq9(jiexi)
def yuedxq9(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="detail"]/p[@class="b-info"]')
    for i in b:
        jiexi = i.xpath('.//p[@class="b-oper"]/a[@class="reader"]/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        print(pinjie)
        hqzangj9(pinjie)
def hqzangj9(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="clearfix dirconone"]/li')
    for i in b:
        title = i.xpath('./a/@title')
        jiexi = i.xpath('./a/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        print(title,pinjie)
        readxq9(pinjie)
def readxq9(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="mainContenr"]/text()')
    c = a.xpath('//strong[@class="l jieqi_title"]/text()')
    print(c,b)
    for i in b:
        writes9(i,c)
def writes9(t,c):
    with open('{}.txt'.format(c),'a+') as f:
        f.write(t)

print("-------------------------------------------------------------------------------")



def qingqiu10(url_i):
    response = requests.get(url_i,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//ul[@class="seeWell cf"]/li')
    for i in b:
        #小说链接详情
        jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
        jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
        jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
        print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
        yuedxq10(jiexi)
def yuedxq10(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="detail"]/p[@class="b-info"]')
    for i in b:
        jiexi = i.xpath('.//p[@class="b-oper"]/a[@class="reader"]/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        print(pinjie)
        hqzangj10(pinjie)
def hqzangj10(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="clearfix dirconone"]/li')
    for i in b:
        title = i.xpath('./a/@title')
        jiexi = i.xpath('./a/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        print(title,pinjie)
        readxq10(pinjie)
def readxq10(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="mainContenr"]/text()')
    c = a.xpath('//strong[@class="l jieqi_title"]/text()')
    print(c,b)
    for i in b:
        writes10(i,c)
def writes10(t,c):
    with open('{}.txt'.format(c),'a+') as f:
        f.write(t)

print("-------------------------------------------------------------------------------")

def qingqiu11(url_o):
    response = requests.get(url_o,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//ul[@class="seeWell cf"]/li')
    for i in b:
        #小说链接详情
        jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
        jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
        jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
        print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
        yuedxq11(jiexi)
def yuedxq11(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="detail"]/p[@class="b-info"]')
    for i in b:
        jiexi = i.xpath('.//p[@class="b-oper"]/a[@class="reader"]/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        print(pinjie)
        hqzangj11(pinjie)
def hqzangj11(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="clearfix dirconone"]/li')
    for i in b:
        title = i.xpath('./a/@title')
        jiexi = i.xpath('./a/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        print(title,pinjie)
        readxq11(pinjie)
def readxq11(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="mainContenr"]/text()')
    c = a.xpath('//strong[@class="l jieqi_title"]/text()')
    print(c,b)
    for i in b:
        writes11(i,c)
def writes11(t,c):
    with open('{}.txt'.format(c),'a+') as f:
        f.write(t)

print("-------------------------------------------------------------------------------")



def qingqiu12(url_p):
    response = requests.get(url_p,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//ul[@class="seeWell cf"]/li')
    for i in b:
        #小说链接详情
        jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
        jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
        jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
        print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
        yuedxq12(jiexi)
def yuedxq12(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="detail"]/p[@class="b-info"]')
    for i in b:
        jiexi = i.xpath('.//p[@class="b-oper"]/a[@class="reader"]/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        print(pinjie)
        jinru(pinjie)
def jinru(url):
    response = requests.get(url,headers=header)
    response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//p[@class="item"]/ul')
    for i in b:
        jiexi = i.xpath('.//li/a[@class="orange"]/@href')[0]
        pinjie = urllib.parse.urljoin(response.url,jiexi)
        print(pinjie)




        
   





if __name__ == '__main__':
    for i in range(1,2): #共982页
        url = "http://www.quanshuwang.com/list/1_%s.html"%str(i)
        qingqiu(url)
    # url_q = "http://www.quanshuwang.com/list/2_1.html"
    # qingqiu2(url_q)
    # url_w = "http://www.quanshuwang.com/list/3_1.html"
    # qingqiu3(url_w)
    # url_e = "http://www.quanshuwang.com/list/4_1.html"
    # qingqiu4(url_e)
    # url_e = "http://www.quanshuwang.com/list/5_1.html"
    # qingqiu5(url_e)
    # url_r = "http://www.quanshuwang.com/list/6_1.html"
    # qingqiu6(url_r)
    # url_t = "http://www.quanshuwang.com/list/7_1.html"
    # qingqiu7(url_t)
    # url_y = "http://www.quanshuwang.com/list/8_1.html"
    # qingqiu8(url_y)
    # url_u = "http://www.quanshuwang.com/list/9_1.html"
    # qingqiu9(url_u)
    # url_i = "http://www.quanshuwang.com/list/10_1.html"
    # qingqiu10(url_i)
    # url_o = "http://www.quanshuwang.com/list/11_1.html"
    # qingqiu11(url_o)
    # url_p = "http://www.quanshuwang.com/list/12_1.html"
    # qingqiu12(url_p)

相关阅读

关于用python爬取自如网信息的价格问题(已解决)

###这是一篇求助文,我能获取图片并变成字符串,但是无法获取位移量### 前两坛突发奇想想要爬取自如网的租房数据,本来以为能够请求+美

电脑程序也能创作小说:你会掏钱买吗?

[摘要]虽然人工智能正在不断地发展,但是它还没有发展到具备“语言加工能力”的程度;没有人类编写的程序,它是绝对做不到自己创作的。

BeautifulSoup 爬取多玩今日囧图的图片

(1)使用技术 python 3.5.2、requests 、BeautifulSoup  (2)背景介绍 爬取的网站http://tu.duowan.com/tag/5037.html 需要爬取的内容

爬取 百度logo(图片)

# coding=utf-8import requestsr = requests.get("https://www.baidu.com/img/bd_logo1.png")with open("baidu.png","wb")

90后毕业写小说,兼职竟比上班赚的还多!

在前几年,穿越系列的电视剧或电影引起了不少的话题。当今各系列题材的影视剧的热播也同样成为人们茶余饭后的一个讨论话题,而在这些

分享到:

栏目导航

推荐阅读

热门阅读