Automated Scraping of WeChat Official Account Articles Using Playwright with Auto-Scrolling
This article demonstrates how to use Playwright with automatic scrolling to scrape all historical article titles and links from a WeChat Offficial Account. The code is provided for educational purposes only.
import re
from playwright.sync_api import sync_playwright
def scrape_wechat_articles():
with sync_playwright() as playwright:
browser = playwright.chromium.launch()
context = browser.new_context()
page = context.new_page()
# Navigate to the target URL
page.goto("https://mp.weixin.qq.com/mp/appmsgalbum?__biz=MzkxNzU2Nzg2NQ==&action=getalbum&album_id=3101281634991243270&scene=173&subscene=&sessionid=svr_e2f2637693e&enterid=1708590301&from_msgid=2247484919&from_itemidx=1&count=3&nolastread=1#wechat_redirect")
# Wait for the page to fully load
page.wait_for_load_state("load")
# Extract the total number of content items
content_count_element = page.query_selector('.album__desc-content.js_album_desc_content > span')
content_count_text = content_count_element.inner_text()
total_items = int(re.search(r'\d+', content_count_text).group()) // 10
# Auto-scroll to load all articles
for scroll_iteration in range(total_items):
print(f"Scrolling iteration {scroll_iteration + 1}...")
page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
page.wait_for_timeout(2000)
# Retrieve all article elements
article_elements = page.query_selector_all(".album__list.js_album_list > .album__list-item")
# Output article titles and links
for article in article_elements:
article_url = article.get_attribute("data-link")
article_title = article.get_attribute("data-title")
print(f"Title: {article_title}\nURL: {article_url}\n*****\n")
browser.close()
if __name__ == "__main__":
scrape_wechat_articles()
Output Example:
Title: Python 爬取各搜索引擎提示词
URL: http://mp.weixin.qq.com/s?__biz=MzkxNzU2Nzg2NQ==&mid=2247484919&idx=1&sn=39086f3f1cde46a6f67b56483d7228a8&chksm=c1bfe5e1f6c86cf7d3d7d29e4a400f7534093f0ac261db5dce2f4ed68e09118e73422446633b#rd
*****
Title: python下雪效果
URL: http://mp.weixin.qq.com/s?__biz=MzkxNzU2Nzg2NQ==&mid=2247484905&idx=1&sn=93d8ca69ddbb04d7035531d097b23522&chksm=c1bfe5fff6c86ce9966df41164d172db7d7c714f5ef60119e209186b5a73756c7a6e3e96cc92#rd
*****
Title: Python制作简单拼图效果
URL: http://mp.weixin.qq.com/s?__biz=MzkxNzU2Nzg2NQ==&mid=2247484901&idx=1&sn=e0c33671ad5ff182290173c437ac7360&chksm=c1bfe5f3f6c86ce532a043efad30384c07a674f6b78af5bb1aa4751176fd74d1148a31fe1fdb#rd
*****
... (additional articles)