Stereo Camera Calibration and Image Acquisition for Computer Vision
Implementing a stereo vision system for distance estimation involves a multi-stage pipeline. The process typically begins with camera calibration to resolve lens ditsortion and determine spatial relationships between the dual sensors. Following calibration, Semi-Global Block Matching (SGBM) is used to genreate disparity maps, wich, when combined with object detection frameworks like YOLOv8, allow for the calculation of an object's real-world distance based on its center coordinates.
Collecting Stereo Image Pairs
To perform calibration, you must capture a series of synchronized images from both sensors. The following script captures a high-resolution wide-angle stream (2560x720) and splits it into discrete left and right frames. Pressing the 'S' key saves the current pair to local storage.
import cv2
import os
def capture_calibration_frames(device_index=0, output_dir="stereo_data"):
stream = cv2.VideoCapture(device_index)
# Configure for side-by-side stereo resolution
stream.set(cv2.CAP_PROP_FRAME_WIDTH, 2560)
stream.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
# Ensure directories exist
os.makedirs(f"{output_dir}/left", exist_ok=True)
os.makedirs(f"{output_dir}/right", exist_ok=True)
image_count = 0
while True:
success, raw_img = stream.read()
if not success:
break
# Horizontal split for side-by-side feed
# height: 720, width: 1280 per channel
view_l = raw_img[0:720, 0:1280]
view_r = raw_img[0:720, 1280:2560]
cv2.imshow("Left Channel", view_l)
cv2.imshow("Right Channel", view_r)
input_key = cv2.waitKey(1) & 0xFF
if input_key == ord('s'):
# Save snapshots
cv2.imwrite(f"{output_dir}/left/frame_{image_count}.png", view_l)
cv2.imwrite(f"{output_dir}/right/frame_{image_count}.png", view_r)
print(f"Saved pair index: {image_count}")
image_count += 1
elif input_key == ord('q'):
break
stream.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
capture_calibration_frames()
Applying Calibration and Image Rectification
Once intrinsic parameters and distortion coefficients are obtained (via MATLAB Calibration Toolbox or OpenCV's calibration methods), they can be applied to live feeds to correct lens warping. This step is critical for accurate stereo matching.
import cv2
import numpy as np
# Intrinsic matrices for left and right sensors
MTX_L = np.array([
[724.298, 0, 636.649],
[0, 725.259, 300.379],
[0, 0, 1]
])
MTX_R = np.array([
[729.938, 0, 574.732],
[0, 731.188, 290.944],
[0, 0, 1]
])
# Adjusted camera matrices for undistortion
OPTIMIZED_L = np.array([[724.298, 0, 640], [0, 725.259, 360], [0, 0, 1]])
OPTIMIZED_R = np.array([[729.938, 0, 640], [0, 731.188, 360], [0, 0, 1]])
# Distortion coefficients [k1, k2, p1, p2, k3]
DIST_L = np.array([0.0883, -0.1039, 0.0403, 0.0003, 0])
DIST_R = np.array([0.1034, -0.1811, -7.93e-05, 0.0003, 0])
video_source = cv2.VideoCapture(0)
video_source.set(cv2.CAP_PROP_FRAME_WIDTH, 2560)
video_source.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
while True:
grabbed, frame = video_source.read()
if not grabbed:
break
raw_left = frame[0:720, 0:1280]
raw_right = frame[0:720, 1280:2560]
# Execute undistortion
rectified_left = cv2.undistort(raw_left, MTX_L, DIST_L, None, OPTIMIZED_L)
rectified_right = cv2.undistort(raw_right, MTX_R, DIST_R, None, OPTIMIZED_R)
cv2.imshow("Rectified Left", rectified_left)
cv2.imshow("Rectified Right", rectified_right)
if cv2.waitKey(1) & 0xFF == 27:
break
video_source.release()
cv2.destroyAllWindows()