import requests #需要 pip install request先下载
import re
from bs4 import BeautifulSoup #需要pip install bs4 或者 pip3 install bs4先下载

# url = "https://www.XXX&chapterid=1"
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"#登录网页-右键“检查"-刷新后查看Network中的User_agent
cookie = "balabala"#登录网页-右键“检查"-刷新后查看Network中的Cookie

headers = {
"User-Agent": user_agent,
"Cookie": cookie
}
for chapter in range(1,2,1):#range区间左闭右开,步长为1
    content = requests.get(f"https://www.XXX&chapterid={chapter}", headers=headers)
    # print("改前编码:" + content.encoding)
    content.encoding = "GBK"
    # print("改后:" + content.encoding)
    html = content.text

    soup = BeautifulSoup(html, "html.parser")
    title = soup.find("h2")
    txt = soup.find("div",attrs={"onselectstart":"return false"})
    with open("tanxulin.txt", "a", encoding="GBK") as f:
        f.write(f"第 {chapter}{title.string}")
        for txt_content in txt:
            # txt_content = re.compile(u'[\u4e00-\u9fa5]')
            readling = txt_content.string
            if readling != None:
                f.write(readling + '\n')
                # print(readling)
Logo

魔乐社区(Modelers.cn) 是一个中立、公益的人工智能社区,提供人工智能工具、模型、数据的托管、展示与应用协同服务,为人工智能开发及爱好者搭建开放的学习交流平台。社区通过理事会方式运作,由全产业链共同建设、共同运营、共同享有,推动国产AI生态繁荣发展。

更多推荐