切换主题
三、面向对象-分页爬取百度贴吧
代码:
python
from fake_useragent import UserAgent
import requests
import time
class Tieba:
def __init__(self):
self.url = 'https://tieba.baidu.com/f?'
self.headers = {
'User-Agent': UserAgent().random
}
self.keyword = ""
self.pages = 0
# 发送请求
def send(self, params):
response = requests.get(url=self.url, params=params)
return response.text
# 保存数据
def save(self, response):
with open(f"./html/{self.keyword}{int(time.time() * 1000)}.html", "w", encoding='utf-8') as fs:
fs.write(response)
# 运行
def run(self):
self.keyword = input("请输入搜索的关键词:")
self.pages = int(input("请输入搜索总页数:"))
for page in range(self.pages):
params = {
'kw': self.keyword,
'pn': page * 50
}
response = self.send(params=params)
self.save(response)
tieBa = Tieba()
tieBa.run()
print("爬取成功")