import time, json, csv, requests
class WeChat:
def __init__(self, biz, key):
self.biz = biz # 想爬的公众号
self.key = key # key是不断变化的
self.uin = YOUR_UIN
self.pass_ticket = YOUR_PASS_TICKET
self.headers = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6)'}
self.session = requests.Session()
self.articles = []
def prepare_for_load(self):
url = 'https://mp.weixin.qq.com/mp/getmasssendmsg'
params = { '__biz' : self.biz, 'uin' : self.uin, 'key' : self.key, 'pass_ticket' : self.pass_ticket }
r = self.session.get(url, params=params, headers=self.headers, allow_redirects=True, verify=False)
def load_articles(self, offset=0):
url = 'https://mp.weixin.qq.com/mp/profile_ext'
params = { 'action' : 'getmsg', '__biz' : self.biz, 'f' : 'json', 'offset' : offset, 'count' : 10 }
r = self.session.get(url, params=params, headers=self.headers, allow_redirects=True, verify=False)
time.sleep(1) # 让微信认为你是真实用户
d = r.json()
general_msg_list = d.get('general_msg_list')
can_msg_continue = d.get('can_msg_continue')
next_offset = d.get('next_offset')
cards = []
try:
cards = json.loads(general_msg_list).get('list')
except Exception as e:
pass
for card in cards:
if card.get('comm_msg_info').get('type') == 49: # 图文
self.articles.append(card.get('app_msg_ext_info'))
if app_msg_ext_info.get('is_multi'):
multi_app_msg_item_list = app_msg_ext_info.get('multi_app_msg_item_list') || []
for multi_app_msg_item in multi_app_msg_item_list:
self.articles.append(multi_app_msg_item)
if can_msg_continue:
self.load_articles(offset=next_offset) #模拟下拉刷新递归获取更多文章
wechat = WeChat(biz=YOUR_BIZ, key=YOUR_KEY)
wechat.prepare_for_load()
wechat.load_articles()
with open('wechat.csv', 'wt', encoding='utf-8') as csvFile:
for a in wechat.articles:
csv.writer(csvFile).writerow([a.title, a.content_url])