Generating Animated Global Subway Mileage Video with Python
Import required libraries and configure plotting settings:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import cv2
from moviepy.editor import VideoFileClip, AudioFileClip, afx
# Configure Chinese font rendering in plots
plt.rcParams['font.serif'] = ['YouYuan']
plt.rcParams['axes.unicode_minus'] = False
Load raw data and preprocess, expand annual data to monthly resolution, fill gaps:
- Set all values before the first poistive entry in each column to 0
- Fill empty monthly gaps with linear interpolation
# Load input data from Excel
raw_data = pd.read_excel('national_subway_mileage.xlsx')
raw_data = raw_data.rename(columns={'Unnamed: 0': 'year'})
# Initialize empty DataFrame for processed monthly data
processed_data = pd.DataFrame(columns=raw_data.columns)
# Expand each annual entry to 12 rows (1 original annual + 11 empty monthly rows)
for row_idx in range(len(raw_data) - 1):
processed_data = pd.concat([processed_data, raw_data.iloc[[row_idx]]], ignore_index=True)
empty_row = pd.DataFrame([[np.nan]*len(raw_data.columns)], columns=raw_data.columns)
for _ in range(11):
processed_data = pd.concat([processed_data, empty_row], ignore_index=True)
# Add the final annual data row
processed_data = pd.concat([processed_data, raw_data.iloc[[-1]]], ignore_index=True)
# Interpolate missing monthly values
processed_data = processed_data.set_index('year')
processed_data = processed_data.interpolate(method='linear')
# Filter post-1900 data and fill remaining missing values with 0
processed_data = processed_data.loc[1900:, :]
processed_data = processed_data.fillna(0)
processed_data = processed_data.reset_index()
# Forward fill year labels for all monthly entries
year_labels = processed_data[['year']].fillna(method='ffill')
# Extract only mileage data for all countries
mileage_data = processed_data.iloc[:, 1:]
Generate individual bar chart frames, display top 10 countries by mileage (all if fewer than 10), sort with largest mileage at the bottom, assign fixed colors with China set to red, add formatted value labels to each bar:
# Assign fixed colors to countries, China is predefined as red
country_names = mileage_data.columns
color_palette = [
'#DDDDDD', '#FF44AA', '#FF5511', '#886600', '#FFFF33',
'#77FF00', '#77FFEE', '#003C9D', '#D28EFF', '#770077', '#FF0000'
]
country_color_map = dict(zip(country_names, color_palette))
# Set plot background style
plt.rcParams['axes.facecolor'] = '#FFE4E1'
# Generate one frame per time step
for step in range(len(mileage_data)):
# Initialize figure and axes
fig, ax = plt.subplots(figsize=(15, 8), dpi=80, facecolor='#FFE4E1')
plt.xlabel("Total Mileage (KM)", fontsize=25)
plt.ylabel("Country", fontsize=25)
plt.tick_params(axis='both', labelsize=25)
plt.title("Global National Subway Mileage", fontsize=30)
# Hide unnecessary top and right spines
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
# Sort countries by current mileage, keep top 10 entries
sorted_mileage = mileage_data.iloc[step].sort_values(ascending=True)
top_countries = sorted_mileage.tail(10)
y_positions = np.arange(len(top_countries))
# Draw horizontal bars
ax.barh(y_positions, top_countries.values,
color=[country_color_map[country] for country in top_countries.index])
# Add current year overlay and watermark
current_year = int(year_labels.iloc[step, 0])
ax.text(0.8, 0.1, str(current_year), transform=ax.transAxes, fontsize=50, alpha=0.7)
ax.text(0.5, 0.5, 'Data Visualization', transform=ax.transAxes, fontsize=50, alpha=0.4)
# Add formatted value labels at the end of each bar
for y_pos, mileage in zip(y_positions, top_countries.values):
ax.text(mileage + 2, y_pos, f'{mileage:.2f} KM', va='center', fontsize=20)
# Save frame to output directory
plt.savefig(f'frames/frame_{step}.jpg')
plt.close()
Compile all generated frames into an MP4 video at 12 frames per second:
# Helper function to read images with non-ASCII file paths
def read_image(file_path):
return cv2.imdecode(np.fromfile(file_path, dtype=np.uint8), -1)
# Video output configuration
output_resolution = (1200, 640)
fps = 12
video_writer = cv2.VideoWriter(
'output_no_audio.mp4',
cv2.VideoWriter_fourcc(*'mp4v'),
fps,
output_resolution
)
# Load all frames and compile into video
frame_list = []
total_steps = len(mileage_data)
for step in range(total_steps):
img = read_image(f'frames/frame_{step}.jpg')
if img is None:
print(f"Frame {step} not found, skipping.")
continue
img = cv2.resize(img, output_resolution)
frame_list.append(img)
for frame in frame_list:
video_writer.write(frame)
video_writer.release()
Add looping background audio to the final video:
# Add BGM to the compiled video
raw_video = VideoFileClip('output_no_audio.mp4')
background_audio = AudioFileClip('bgm.mp3')
looped_audio = afx.audio_loop(background_audio, duration=raw_video.duration)
final_video = raw_video.set_audio(looped_audio)
final_video.write_videofile('final_subway_mileage_animation.mp4')