Skip to content

三、面向对象-分页爬取百度贴吧

代码:

python
from fake_useragent import UserAgent
import requests
import time


class Tieba:
    def __init__(self):
        self.url = 'https://tieba.baidu.com/f?'
        self.headers = {
            'User-Agent': UserAgent().random
        }
        self.keyword = ""
        self.pages = 0

    # 发送请求
    def send(self, params):
        response = requests.get(url=self.url, params=params)
        return response.text

    # 保存数据
    def save(self, response):
        with open(f"./html/{self.keyword}{int(time.time() * 1000)}.html", "w", encoding='utf-8') as fs:
            fs.write(response)

    # 运行
    def run(self):
        self.keyword = input("请输入搜索的关键词:")
        self.pages = int(input("请输入搜索总页数:"))
        for page in range(self.pages):
            params = {
                'kw': self.keyword,
                'pn': page * 50
            }
            response = self.send(params=params)
            self.save(response)


tieBa = Tieba()
tieBa.run()

print("爬取成功")