Building a Chinese Poetry Chain Game with Python
The concept of a poetry chain game, often seen in cultural competitions, is inspired by the classical literary drinking game 'Fei Hua Ling'. This challenge requires participants to sequentially recite lines of poetry where the last character of one line phonetically matches the first character of the next. While challenging for humans, this task is well-suited for computational automation.
This article details the implementation of a Chinese poetry chain game using Python. The core process involves collecting a corpus of classical poems, processing them to extract phonetic information, and creating a system that can automatically generate or respond to poetic chains.
1. Data Collection via Web Scraping
The first step is to gather a substantial collection of classical Chinese poems. Websites like Gushiwen.org provide categorized archives suitbale for scraping. The following script uses requests and BeautifulSoup to fetch and parse poem content from multiple listing pages.
import re
import requests
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor, wait, ALL_COMPLETED
# Target poem list pages
list_urls = [
'https://so.gushiwen.org/gushi/tangshi.aspx',
'https://so.gushiwen.org/gushi/sanbai.aspx',
'https://so.gushiwen.org/gushi/songsan.aspx',
'https://so.gushiwen.org/gushi/songci.aspx',
'https://so.gushiwen.org/gushi/shijiu.aspx'
]
poem_detail_links = []
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
for list_url in list_urls:
resp = requests.get(list_url, headers=headers)
soup = BeautifulSoup(resp.text, 'lxml')
# Locate the main content div
main_div = soup.find('div', class_='sons')
if main_div:
for link_tag in main_div.find_all('a'):
href = link_tag.get('href')
if href:
poem_detail_links.append('https://so.gushiwen.org' + href)
print(f'Found {len(poem_detail_links)} poem links.')
def clean_text(raw_line):
"""Remove non-Chinese characters and specific punctuation."""
# Keep Chinese characters, letters, and numbers
pattern = re.compile(u'[^\u4e00-\u9fa5a-zA-Z0-9]')
cleaned = pattern.sub('', raw_line)
return cleaned
def fetch_poem_text(detail_url):
"""Fetch and clean the poem text from a detail page."""
print(f'Fetching: {detail_url}')
try:
resp = requests.get(detail_url, headers=headers, timeout=10)
soup = BeautifulSoup(resp.text, 'lxml')
poem_div = soup.find('div', class_='contson')
if poem_div:
raw_text = poem_div.get_text(strip=True)
# Remove spaces and annotations in parentheses
raw_text = raw_text.replace(' ', '')
# Remove content within various parenthesis styles
raw_text = re.sub(r'\(.*?\)', '', raw_text)
raw_text = re.sub(r'\(.*?\)', '', raw_text)
raw_text = re.sub(r'\(.*?\)', '', raw_text)
raw_text = re.sub(r'\(.*?\)', '', raw_text)
# Standardize punctuation
raw_text = raw_text.replace('!', '!').replace('?', '?')
return raw_text
except Exception as e:
print(f'Error fetching {detail_url}: {e}')
return None
# Use threading to speed up fetching
collected_poems = []
executor = ThreadPoolExecutor(max_workers=8)
futures = [executor.submit(fetch_poem_text, url) for url in poem_detail_links]
wait(futures, return_when=ALL_COMPLETED)
for future in futures:
result = future.result()
if result and len(result) > 0:
collected_poems.append(result)
# Deduplicate and save
unique_poems = sorted(list(set(collected_poems)), key=len)
with open('poetry_corpus.txt', 'w', encoding='utf-8') as out_file:
for poem in unique_poems:
# Remove some specific characters before writing
for char in ['《', '》', ':', '"']:
poem = poem.replace(char, '')
out_file.write(poem + '\n')
print(f'Successfully saved {len(unique_poems)} poems.')
2. Processing the Poetry Corpus
The next step is to index the poems by the phonetic sound (Pinyin) of the first character of each line. This allows for quick lookup during the chain game. We use the xpinyin library for conversion and pickle for serialization.
import re
import pickle
from xpinyin import Pinyin
from collections import defaultdict
def create_phonetic_index(input_file='poetry_corpus.txt', output_file='poetry_index.pk'):
"""Process poems to create a phonetic index."""
with open(input_file, 'r', encoding='utf-8') as f:
raw_lines = f.readlines()
individual_verses = []
for line in raw_lines:
# Split poem into verses based on classical sentence endings
verses = re.split('[。?!]', line.strip())
for verse in verses:
if len(verse) >= 4: # Filter out very short fragments
individual_verses.append(verse)
# Create index: key is Pinyin of first char, value is list of verses
verse_index = defaultdict(list)
pinyin_converter = Pinyin()
# Use set to deduplicate verses
for verse in set(individual_verses):
# Get Pinyin of the first character, with tone marks
first_char_pinyin = pinyin_converter.get_pinyin(verse[0], tone_marks='marks')
verse_index[first_char_pinyin].append(verse)
# Save the index for later use
with open(output_file, 'wb') as f:
pickle.dump(dict(verse_index), f)
print(f'Index created with {len(verse_index)} phonetic keys.')
if __name__ == '__main__':
create_phonetic_index()
To load and inspect the created index:
import pickle
with open('poetry_index.pk', 'rb') as f:
loaded_index = pickle.load(f)
for phonetic_key, verse_list in list(loaded_index.items())[:5]:
print(f'{phonetic_key}: {verse_list[:2]}') # Print first two examples
3. Implementing the Chain Game Logic
The final component is the interactive game itself. It supports two modes: human vs. computer and fully automated computer chain generation. It uses phonetic matching to find the next verse.
import pickle
import random
from xpinyin import Pinyin
# Load the pre-built phonetic index
with open('poetry_index.pk', 'rb') as f:
POETRY_INDEX = pickle.load(f)
pinyin_tool = Pinyin()
def get_last_character_pinyin(input_string):
"""Extract the Pinyin of the last character from a string."""
# Handle empty or very short strings
if not input_string or len(input_string.strip()) == 0:
return None
last_char = input_string.strip()[-1]
return pinyin_tool.get_pinyin(last_char, tone_marks='marks')
def get_first_character_pinyin(input_string):
"""Extract the Pinyin of the first character from a string."""
if not input_string or len(input_string.strip()) == 0:
return None
first_char = input_string.strip()[0]
return pinyin_tool.get_pinyin(first_char, tone_marks='marks')
def play_human_vs_computer():
"""Interactive mode where user and computer take turns."""
print('\n--- Human vs. Computer Mode ---')
start_input = input('Enter a verse or a single character to start: ').strip()
current_verse = start_input
while True:
last_pinyin = get_last_character_pinyin(current_verse)
if last_pinyin not in POETRY_INDEX:
print(f'No verse found starting with sound "{last_pinyin}". Game over.')
break
# Computer chooses a random matching verse
computer_choice = random.choice(POETRY_INDEX[last_pinyin])
print(f'Computer: {computer_choice}')
# Human's turn: input must start with the last char of computer's verse
required_start_pinyin = get_last_character_pinyin(computer_choice)
print(f'Your verse must start with sound: {required_start_pinyin}')
user_input = input('Your verse (or type "exit" to quit): ').strip()
if user_input.lower() == 'exit':
break
user_start_pinyin = get_first_character_pinyin(user_input)
if user_start_pinyin == required_start_pinyin:
current_verse = user_input
else:
print('Phonetic mismatch! Game over.')
break
def play_auto_chain(max_length=10):
"""Automated mode where the computer generates a chain."""
print('\n--- Automated Chain Generation ---')
seed_input = input('Enter a starting verse or character: ').strip()
chain = [seed_input]
current = seed_input
for i in range(max_length):
target_pinyin = get_last_character_pinyin(current)
if target_pinyin not in POETRY_INDEX:
print(f'Chain broken at step {i+1}. No match for "{target_pinyin}".')
break
next_verse = random.choice(POETRY_INDEX[target_pinyin])
chain.append(next_verse)
current = next_verse
print(f'Step {i+1}: {next_verse}')
print(f'\nGenerated chain ({len(chain)} verses):')
for idx, verse in enumerate(chain):
print(f' {idx+1}. {verse}')
# Main game loop
if __name__ == '__main__':
while True:
print('\n===== Poetry Chain Game =====')
print('1. Human vs. Computer (Interactive)')
print('2. Automated Chain Generation')
print('3. Exit')
choice = input('Select mode (1-3): ').strip()
if choice == '1':
play_human_vs_computer()
elif choice == '2':
play_auto_chain()
elif choice == '3':
print('Exiting. Goodbye!')
break
else:
print('Invalid selection. Please try again.')