Fading Coder

One Final Commit for the Last Sprint

Home > Tools > Content

OpenCV Image Processing: I/O Operations, Spatial Manipulation, and Channel Management

Tools 1

OpenCV stores digital images as multi-dimensional NumPy arrays, with pixel intensities typically ranging from 0 to 255. Color images follow the BGR channel ordering by default, comprising three matrices representing blue, grean, and red intensities.

Media Input Operations

Reading static images utilizes cv2.imread(), which decodes files into array representations. For dynamic video sources, the VideoCapture class manages both prerecorded files and live camera feeds:

import cv2

cap = cv2.VideoCapture(0)  # Device index 0 for default camera
if not cap.isOpened():
    raise IOError("Video source inaccessible")

while True:
    ret, frame = cap.read()
    if not ret:
        break
    # Processing logic here
    cv2.imshow('Preview', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

The read() method returns a boolean indicating frame availability and the frame data itself. When capturing from network streams (RTSP/HTTP), the same interface applies—simply pass the URL string instead of a device index.

Spatial Region Extraction

Isolating specific portions of an image leverages NumPy slicing operations on the array. The following implementation demonstrates center-cropping with configurable dimensions:

from pathlib import Path
import cv2

def extract_center_regions(source_dir: str, output_dir: str, crop_size: int = 128):
    src_path = Path(source_dir)
    dst_path = Path(output_dir)
    dst_path.mkdir(exist_ok=True)
    
    for img_file in src_path.glob('*.jpg'):
        img = cv2.imread(str(img_file))
        if img is None:
            continue
            
        height, width = img.shape[:2]
        cy, cx = height // 2, width // 2
        half = crop_size // 2
        
        # Define ROI boundaries
        y_start, y_end = max(0, cy - half), min(height, cy + half)
        x_start, x_end = max(0, cx - half), min(width, cx + half)
        
        roi = img[y_start:y_end, x_start:x_end]
        cv2.imwrite(str(dst_path / img_file.name), roi)

extract_center_regions('input_images', 'cropped_output')

For processing extensive collections, sequential iteration becomes inefficient. This batch processing approach incorporates dimension-based conditional logic:

import cv2
from pathlib import Path
import time

def batch_crop_documents(input_folder, output_folder):
    in_path = Path(input_folder)
    out_path = Path(output_folder)
    out_path.mkdir(parents=True, exist_ok=True)
    
    image_paths = list(in_path.rglob('*.png')) + list(in_path.rglob('*.jpg'))
    total = len(image_paths)
    
    for idx, img_path in enumerate(image_paths):
        img = cv2.imread(str(img_path))
        if img is None:
            continue
            
        h, w = img.shape[:2]
        aspect = h / w
        
        # Conditional cropping based on document geometry
        if aspect < 1.3:
            # Standard document layout
            cropped = img[50:200, 700:1500]
        else:
            # Vertical/rolled document format
            cropped = img[30:150, 50:600]
            
        out_file = out_path / img_path.name
        cv2.imwrite(str(out_file), cropped)
        
        # Progress indication
        progress = (idx + 1) / total * 100
        print(f'Processing {img_path.name}: {progress:.1f}%')

batch_crop_documents('raw_docs', 'processed_docs')

To maximize throughput on multi-core systems, distribute the workload across processes:

import cv2
from pathlib import Path
from concurrent.futures import ProcessPoolExecutor
import multiprocessing

def process_single_image(args):
    img_path, output_dir = args
    try:
        img = cv2.imread(str(img_path))
        if img is None:
            return f"Failed to load: {img_path}"
            
        h, w = img.shape[:2]
        
        # Adaptive cropping logic
        if w > 1600:
            region = img[50:200, 700:1500]
        else:
            region = img[30:150, 50:600]
            
        out_path = Path(output_dir) / img_path.name
        cv2.imwrite(str(out_path), region)
        return None
    except Exception as e:
        return f"Error processing {img_path}: {e}"

def parallel_crop(source, destination, max_workers=None):
    if max_workers is None:
        max_workers = multiprocessing.cpu_count()
        
    src = Path(source)
    dst = Path(destination)
    dst.mkdir(exist_ok=True)
    
    files = list(src.iterdir())
    tasks = [(f, dst) for f in files if f.suffix.lower() in ('.jpg', '.png')]
    
    with ProcessPoolExecutor(max_workers=max_workers) as executor:
        results = list(executor.map(process_single_image, tasks))
    
    failures = [r for r in results if r is not None]
    print(f"Completed. Failures: {len(failures)}")
    for fail in failures[:5]:
        print(fail)

parallel_crop('dataset/raw', 'dataset/cropped')

Channel Decomposition and Reconstruction

Color images consist of distinct channel layers that can be separated for independent analysis:

import cv2

bgr_img = cv2.imread('color_photo.jpg')
if bgr_img is not None:
    # Decompose into individual channels
    blue_plane, green_plane, red_plane = cv2.split(bgr_img)
    
    # Display individual channels
    cv2.imshow('Blue Component', blue_plane)
    cv2.imshow('Green Component', green_plane)
    cv2.imshow('Red Component', red_plane)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

Reconstructing channels requires maintaining proper ordering. The merge operation combines single-channel arrays into multi-channel representations:

# Reconstruct original BGR ordering
reconstructed = cv2.merge([blue_plane, green_plane, red_plane])

# Alternative: Create RGB ordering by rearranging sequence
rgb_version = cv2.merge([red_plane, green_plane, blue_plane])

All input arrays must share identical dimensions and data types. Mismatched geometries trigger runtime errors during the merge operation.

Boundary Extension Strategies

The copyMakeBorder function expands image dimensions by padding edges using various extrapolation methods:

import cv2

src = cv2.imread('sample.jpg')
pad = 50

# Constant value padding (black border)
constant_pad = cv2.copyMakeBorder(
    src, pad, pad, pad, pad, 
    cv2.BORDER_CONSTANT, value=0
)

# Replicate edge pixels
replicate_pad = cv2.copyMakeBorder(
    src, pad, pad, pad, pad, 
    cv2.BORDER_REPLICATE
)

# Reflect with edge pixel duplication
reflect_pad = cv2.copyMakeBorder(
    src, pad, pad, pad, pad, 
    cv2.BORDER_REFLECT
)

# Reflect without edge duplication (mirror)
reflect101_pad = cv2.copyMakeBorder(
    src, pad, pad, pad, pad, 
    cv2.BORDER_REFLECT_101
)

# Wrap around (cyclic)
wrap_pad = cv2.copyMakeBorder(
    src, pad, pad, pad, pad, 
    cv2.BORDER_WRAP
)

For region filling applications, the flood fill algorithm replaces connected pixel regions:

import cv2
import numpy as np

canvas = cv2.imread('binary_mask.png')
mask = np.zeros((canvas.shape[0] + 2, canvas.shape[1] + 2), dtype=np.uint8)

# Fill from seed point with target color
cv2.floodFill(
    canvas, 
    mask, 
    seedPoint=(100, 100), 
    newVal=(255, 255, 0),
    loDiff=(10, 10, 10),
    upDiff=(10, 10, 10)
)

Pixel Arithmetic and Image Fusion

OpenCV's addition operation saturates at 255 (maximum unsigned 8-bit value) rather than wrapping around:

import cv2
import numpy as np

mat_a = np.uint8([250])
mat_b = np.uint8([20])

result = cv2.add(mat_a, mat_b)  # Outputs 255, not 14

Combining images of different sizes requires geometric normalization. The resize function adjusts dimensions using various interpolation algorithms:

foreground = cv2.imread('object.png')
background = cv2.imread('scene.jpg')

# Match foreground to background dimensions
target_h, target_w = background.shape[:2]
scaled_fg = cv2.resize(foreground, (target_w, target_h), interpolation=cv2.INTER_LINEAR)

# Alpha blending with weighting coefficients
blended = cv2.addWeighted(
    background, 0.7,    # Background weight
    scaled_fg, 0.3,     # Foreground weight  
    gamma=10            # Brightness offset
)

For non-uniform scaling, specify relative factors while setting output dimensions to zero:

# Stretch horizontally by 2x, compress vertically by 0.5x
anisotropic = cv2.resize(
    src_img, 
    (0, 0), 
    fx=2.0, 
    fy=0.5, 
    interpolation=cv2.INTER_CUBIC
)

Related Articles

Efficient Usage of HTTP Client in IntelliJ IDEA

IntelliJ IDEA incorporates a versatile HTTP client tool, enabling developres to interact with RESTful services and APIs effectively with in the editor. This functionality streamlines workflows, replac...

Installing CocoaPods on macOS Catalina (10.15) Using a User-Managed Ruby

System Ruby on macOS 10.15 frequently fails to build native gems required by CocoaPods (for example, ffi), leading to errors like: ERROR: Failed to build gem native extension checking for ffi.h... no...

Resolve PhpStorm "Interpreter is not specified or invalid" on WAMP (Windows)

Symptom PhpStorm displays: "Interpreter is not specified or invalid. Press ‘Fix’ to edit your project configuration." This occurs when the IDE cannot locate a valid PHP CLI executable or when the debu...

Leave a Comment

Anonymous

◎Feel free to join the discussion and share your thoughts.