from urllib.parse import urlencode

import requests

from pyquery import PyQuery as pq

base_url = 'https://m.weibo.cn/api/container/getIndex?'

headers = {

'Host': 'm.weibo.cn',

'Referer': 'https://m.weibo.cn/u/2830678474',

'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',

'X-Requested-With': 'XMLHttpRequest',

}

def get_page(page):

params = {

'type': 'uid',

'value': '2830678474',

'containerid': '1076032830678474',

'page': page

}

url = base_url + urlencode(params)

try:

response = requests.get(url, headers=headers)

if response.status_code == 200:

return response.json()

except requests.ConnectionError as e:

print('Error', e.args)

#

def parse_page(json):

if json:

items = json.get('data').get('cards')

for item in items:

item = item.get('mblog')

weibo = {}

weibo['id'] = item.get('id')

weibo['正文内容'] = pq(item.get('text')).text()

weibo['赞数'] = item.get('attitudes_count')

weibo['多少条评论'] = item.get('comments_count')

weibo['转发次数'] = item.get('reposts_count')

yield weibo

if __name__ == '__main__':

for page in range(1, 11):

json = get_page(page)

results = parse_page(json)

for result in results:

print(result)

一键复制

编辑

Web IDE

原始数据

按行查看

历史

Logo

魔乐社区(Modelers.cn) 是一个中立、公益的人工智能社区,提供人工智能工具、模型、数据的托管、展示与应用协同服务,为人工智能开发及爱好者搭建开放的学习交流平台。社区通过理事会方式运作,由全产业链共同建设、共同运营、共同享有,推动国产AI生态繁荣发展。

更多推荐