OpenCV Image Processing: I/O Operations, Spatial Manipulation, and Channel Management
OpenCV stores digital images as multi-dimensional NumPy arrays, with pixel intensities typically ranging from 0 to 255. Color images follow the BGR channel ordering by default, comprising three matrices representing blue, grean, and red intensities.
Media Input Operations
Reading static images utilizes cv2.imread(), which decodes files into array representations. For dynamic video sources, the VideoCapture class manages both prerecorded files and live camera feeds:
import cv2
cap = cv2.VideoCapture(0) # Device index 0 for default camera
if not cap.isOpened():
raise IOError("Video source inaccessible")
while True:
ret, frame = cap.read()
if not ret:
break
# Processing logic here
cv2.imshow('Preview', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
The read() method returns a boolean indicating frame availability and the frame data itself. When capturing from network streams (RTSP/HTTP), the same interface applies—simply pass the URL string instead of a device index.
Spatial Region Extraction
Isolating specific portions of an image leverages NumPy slicing operations on the array. The following implementation demonstrates center-cropping with configurable dimensions:
from pathlib import Path
import cv2
def extract_center_regions(source_dir: str, output_dir: str, crop_size: int = 128):
src_path = Path(source_dir)
dst_path = Path(output_dir)
dst_path.mkdir(exist_ok=True)
for img_file in src_path.glob('*.jpg'):
img = cv2.imread(str(img_file))
if img is None:
continue
height, width = img.shape[:2]
cy, cx = height // 2, width // 2
half = crop_size // 2
# Define ROI boundaries
y_start, y_end = max(0, cy - half), min(height, cy + half)
x_start, x_end = max(0, cx - half), min(width, cx + half)
roi = img[y_start:y_end, x_start:x_end]
cv2.imwrite(str(dst_path / img_file.name), roi)
extract_center_regions('input_images', 'cropped_output')
For processing extensive collections, sequential iteration becomes inefficient. This batch processing approach incorporates dimension-based conditional logic:
import cv2
from pathlib import Path
import time
def batch_crop_documents(input_folder, output_folder):
in_path = Path(input_folder)
out_path = Path(output_folder)
out_path.mkdir(parents=True, exist_ok=True)
image_paths = list(in_path.rglob('*.png')) + list(in_path.rglob('*.jpg'))
total = len(image_paths)
for idx, img_path in enumerate(image_paths):
img = cv2.imread(str(img_path))
if img is None:
continue
h, w = img.shape[:2]
aspect = h / w
# Conditional cropping based on document geometry
if aspect < 1.3:
# Standard document layout
cropped = img[50:200, 700:1500]
else:
# Vertical/rolled document format
cropped = img[30:150, 50:600]
out_file = out_path / img_path.name
cv2.imwrite(str(out_file), cropped)
# Progress indication
progress = (idx + 1) / total * 100
print(f'Processing {img_path.name}: {progress:.1f}%')
batch_crop_documents('raw_docs', 'processed_docs')
To maximize throughput on multi-core systems, distribute the workload across processes:
import cv2
from pathlib import Path
from concurrent.futures import ProcessPoolExecutor
import multiprocessing
def process_single_image(args):
img_path, output_dir = args
try:
img = cv2.imread(str(img_path))
if img is None:
return f"Failed to load: {img_path}"
h, w = img.shape[:2]
# Adaptive cropping logic
if w > 1600:
region = img[50:200, 700:1500]
else:
region = img[30:150, 50:600]
out_path = Path(output_dir) / img_path.name
cv2.imwrite(str(out_path), region)
return None
except Exception as e:
return f"Error processing {img_path}: {e}"
def parallel_crop(source, destination, max_workers=None):
if max_workers is None:
max_workers = multiprocessing.cpu_count()
src = Path(source)
dst = Path(destination)
dst.mkdir(exist_ok=True)
files = list(src.iterdir())
tasks = [(f, dst) for f in files if f.suffix.lower() in ('.jpg', '.png')]
with ProcessPoolExecutor(max_workers=max_workers) as executor:
results = list(executor.map(process_single_image, tasks))
failures = [r for r in results if r is not None]
print(f"Completed. Failures: {len(failures)}")
for fail in failures[:5]:
print(fail)
parallel_crop('dataset/raw', 'dataset/cropped')
Channel Decomposition and Reconstruction
Color images consist of distinct channel layers that can be separated for independent analysis:
import cv2
bgr_img = cv2.imread('color_photo.jpg')
if bgr_img is not None:
# Decompose into individual channels
blue_plane, green_plane, red_plane = cv2.split(bgr_img)
# Display individual channels
cv2.imshow('Blue Component', blue_plane)
cv2.imshow('Green Component', green_plane)
cv2.imshow('Red Component', red_plane)
cv2.waitKey(0)
cv2.destroyAllWindows()
Reconstructing channels requires maintaining proper ordering. The merge operation combines single-channel arrays into multi-channel representations:
# Reconstruct original BGR ordering
reconstructed = cv2.merge([blue_plane, green_plane, red_plane])
# Alternative: Create RGB ordering by rearranging sequence
rgb_version = cv2.merge([red_plane, green_plane, blue_plane])
All input arrays must share identical dimensions and data types. Mismatched geometries trigger runtime errors during the merge operation.
Boundary Extension Strategies
The copyMakeBorder function expands image dimensions by padding edges using various extrapolation methods:
import cv2
src = cv2.imread('sample.jpg')
pad = 50
# Constant value padding (black border)
constant_pad = cv2.copyMakeBorder(
src, pad, pad, pad, pad,
cv2.BORDER_CONSTANT, value=0
)
# Replicate edge pixels
replicate_pad = cv2.copyMakeBorder(
src, pad, pad, pad, pad,
cv2.BORDER_REPLICATE
)
# Reflect with edge pixel duplication
reflect_pad = cv2.copyMakeBorder(
src, pad, pad, pad, pad,
cv2.BORDER_REFLECT
)
# Reflect without edge duplication (mirror)
reflect101_pad = cv2.copyMakeBorder(
src, pad, pad, pad, pad,
cv2.BORDER_REFLECT_101
)
# Wrap around (cyclic)
wrap_pad = cv2.copyMakeBorder(
src, pad, pad, pad, pad,
cv2.BORDER_WRAP
)
For region filling applications, the flood fill algorithm replaces connected pixel regions:
import cv2
import numpy as np
canvas = cv2.imread('binary_mask.png')
mask = np.zeros((canvas.shape[0] + 2, canvas.shape[1] + 2), dtype=np.uint8)
# Fill from seed point with target color
cv2.floodFill(
canvas,
mask,
seedPoint=(100, 100),
newVal=(255, 255, 0),
loDiff=(10, 10, 10),
upDiff=(10, 10, 10)
)
Pixel Arithmetic and Image Fusion
OpenCV's addition operation saturates at 255 (maximum unsigned 8-bit value) rather than wrapping around:
import cv2
import numpy as np
mat_a = np.uint8([250])
mat_b = np.uint8([20])
result = cv2.add(mat_a, mat_b) # Outputs 255, not 14
Combining images of different sizes requires geometric normalization. The resize function adjusts dimensions using various interpolation algorithms:
foreground = cv2.imread('object.png')
background = cv2.imread('scene.jpg')
# Match foreground to background dimensions
target_h, target_w = background.shape[:2]
scaled_fg = cv2.resize(foreground, (target_w, target_h), interpolation=cv2.INTER_LINEAR)
# Alpha blending with weighting coefficients
blended = cv2.addWeighted(
background, 0.7, # Background weight
scaled_fg, 0.3, # Foreground weight
gamma=10 # Brightness offset
)
For non-uniform scaling, specify relative factors while setting output dimensions to zero:
# Stretch horizontally by 2x, compress vertically by 0.5x
anisotropic = cv2.resize(
src_img,
(0, 0),
fx=2.0,
fy=0.5,
interpolation=cv2.INTER_CUBIC
)