Fundamental Image Processing Techniques Using OpenCV and Python
Core Image I/O and Channel Manipulation
Loading visual data and managing color channels are foundational steps. OpenCV defaults to BGR ordering, requiirng explicit channel management for RGB workflows.
import cv2
import numpy as np
def demonstrate_channel_operations():
source_path = "sample_image.png"
frame = cv2.imread(source_path, cv2.IMREAD_COLOR)
if frame is None: return
# Decompose into Blue, Green, Red matrices
blue_ch, green_ch, red_ch = cv2.split(frame)
# Create a modified composite by isolating the red channel
height, width = frame.shape[:2]
zero_plane = np.zeros((height, width), dtype=np.uint8)
isolated_red = cv2.merge([zero_plane, zero_plane, red_ch])
cv2.imshow("Original", frame)
cv2.imshow("Red Channel Isolated", isolated_red)
cv2.waitKey(0)
cv2.destroyAllWindows()
if __name__ == "__main__":
demonstrate_channel_operations()
Pixel Arithmetic and Weighted Blending
Standard addition operators (+) in NumPy perform modulo arithmetic, causing wrap-around effects when pixel values exceed 255. OpenCV's add function saturates values at the maximum limit. For combining images with varying intensities, weighted blending is preferred.
import cv2
def blend_and_add():
img_a = cv2.imread("scene1.png", cv2.IMREAD_COLOR)
img_b = cv2.imread("scene2.png", cv2.IMREAD_COLOR)
if img_a is None or img_b is None: return
# Resize to match dimensions
img_b = cv2.resize(img_b, (img_a.shape[1], img_a.shape[0]))
# Saturated addition prevents overflow wrapping
saturated_sum = cv2.add(img_a, img_b)
# Weighted composition: 60% first image, 40% second image
blended_output = cv2.addWeighted(img_a, 0.6, img_b, 0.4, 0)
cv2.imshow("Saturated Addition", saturated_sum)
cv2.imshow("Weighted Blend", blended_output)
cv2.waitKey(0)
cv2.destroyAllWindows()
Spatial Filtering for Noise Reduction
Smoothing filters apply convolution kernels to reduce high-frequency noise. The choice of filter depends on the noise characteristics.
import cv2
def apply_smoothing_filters():
raw_img = cv2.imread("noisy_photo.png", cv2.IMREAD_COLOR)
if raw_img is None: return
# Box blur: uniform kernel averaging
uniform_blur = cv2.blur(raw_img, ksize=(5, 5))
# Gaussian blur: weighted by normal distribution
gaussian_smoothed = cv2.GaussianBlur(raw_img, ksize=(7, 7), sigmaX=0)
# Median blur: effective against salt-and-pepper noise
median_filtered = cv2.medianBlur(raw_img, ksize=9)
cv2.imshow("Box Filtered", uniform_blur)
cv2.imshow("Gaussian Filtered", gaussian_smoothed)
cv2.imshow("Median Filtered", median_filtered)
cv2.waitKey(0)
cv2.destroyAllWindows()
Morphological Processing
Morphological operations rely on a structuring element to probe the image structure. Basic erosion and dilation form the foundation for advanced techniques like opening and closing.
import cv2
import numpy as np
def morphological_transforms():
binary_mask = cv2.imread("binary_input.png", cv2.IMREAD_GRAYSCALE)
_, binary_mask = cv2.threshold(binary_mask, 127, 255, cv2.THRESH_BINARY)
if binary_mask is None: return
structuring_elem = np.ones((5, 5), dtype=np.uint8)
# Opening: Erosion followed by Dilation (removes small noise)
cleaned = cv2.morphologyEx(binary_mask, cv2.MORPH_OPEN, structuring_elem)
# Closing: Dilation followed by Erosion (fills small holes)
filled = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, structuring_elem)
# Morphological Gradient: Dilation minus Erosion (highlights boundaries)
boundary_map = cv2.morphologyEx(binary_mask, cv2.MORPH_GRADIENT, structuring_elem)
cv2.imshow("Opened", cleaned)
cv2.imshow("Closed", filled)
cv2.imshow("Gradient Edges", boundary_map)
cv2.waitKey(0)
cv2.destroyAllWindows()
Gradient Computation and Edge Detection
Derivative filters estimate intensity changes too locate structural boundaries. First-order operators like Sobel and Scharr compute gradients, while the Laplacian measures second-order derivatives. The Canny algorithm combines gradient magnitude with hysteresis thresholding for robust edge extraction.
import cv2
def detect_edges_and_gradients():
src = cv2.imread("landscape.jpg", cv2.IMREAD_GRAYSCALE)
if src is None: return
# Sobel operator in X and Y directions
grad_x = cv2.Sobel(src, cv2.CV_64F, 1, 0, ksize=3)
grad_y = cv2.Sobel(src, cv2.CV_64F, 0, 1, ksize=3)
# Convert back to 8-bit and combine
abs_x = cv2.convertScaleAbs(grad_x)
abs_y = cv2.convertScaleAbs(grad_y)
sobel_combined = cv2.addWeighted(abs_x, 0.5, abs_y, 0.5, 0)
# Canny edge detector
canny_edges = cv2.Canny(src, threshold1=100, threshold2=200)
cv2.imshow("Sobel Combined", sobel_combined)
cv2.imshow("Canny Edges", canny_edges)
cv2.waitKey(0)
cv2.destroyAllWindows()
Image Pyramids and Multi-Scale Representation
Downsampling and upsampling create multi-resolution representations. Gaussian pyramids are built using pyrDown and pyrUp. Subtracting an upsampled lower-resolution layer from the original generates the Laplacian pyramid, capturing detail information.
import cv2
def construct_pyramids():
input_frame = cv2.imread("target.png", cv2.IMREAD_COLOR)
if input_frame is None: return
# Downsample twice
level_1 = cv2.pyrDown(input_frame)
level_2 = cv2.pyrDown(level_1)
# Upsample back (results in slight blur due to lost information)
reconstructed = cv2.pyrUp(level_2)
reconstructed = cv2.resize(reconstructed, (input_frame.shape[1], input_frame.shape[0]))
# Laplacian detail extraction
laplacian_detail = cv2.subtract(input_frame, reconstructed)
cv2.imshow("Downsampled Level 1", level_1)
cv2.imshow("Laplacian Detail", laplacian_detail)
cv2.waitKey(0)
cv2.destroyAllWindows()
Contour Extraction and Geometric Analysis
Contours represent continuous curves connecting boundray points. They require a binary input, typically obtained via thresholding or edge detection.
import cv2
def extract_contours():
original = cv2.imread("shapes.png", cv2.IMREAD_COLOR)
gray_scale = cv2.cvtColor(original, cv2.COLOR_BGR2GRAY)
_, binarized = cv2.threshold(gray_scale, 100, 255, cv2.THRESH_BINARY)
# Retrieve external contours only
contours, _ = cv2.findContours(binarized, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Overlay contours on original image
contour_overlay = original.copy()
cv2.drawContours(contour_overlay, contours, -1, (0, 255, 0), 2)
cv2.imshow("Binarized", binarized)
cv2.imshow("Detected Contours", contour_overlay)
cv2.waitKey(0)
cv2.destroyAllWindows()
Histogram Analysis and Equalization
Histograms quantify pixel intensity distribution. Global histogram equalization stretches the dynamic range to enhance contrast, particular useful for underexposed images.
import cv2
import matplotlib.pyplot as plt
def analyze_and_equalize_histogram():
photo = cv2.imread("dark_scene.png", cv2.IMREAD_GRAYSCALE)
if photo is None: return
# Compute global histogram
intensity_freq = cv2.calcHist([photo], [0], None, [256], [0, 256])
# Apply contrast enhancement
enhanced = cv2.equalizeHist(photo)
eq_freq = cv2.calcHist([enhanced], [0], None, [256], [0, 256])
# Visualization
plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1), plt.plot(intensity_freq, color='gray')
plt.title("Original Distribution"), plt.grid(True)
plt.subplot(1, 2, 2), plt.plot(eq_freq, color='black')
plt.title("Equalized Distribution"), plt.grid(True)
plt.show()
cv2.imshow("Contrast Enhanced", enhanced)
cv2.waitKey(0)
cv2.destroyAllWindows()
Region Masking and Bitwise Logic
Masks define areas of inteerst for localized processing. Bitwise operations restrict computations or visualizations to specific rectangular or irregular regions.
import cv2
import numpy as np
def apply_region_masking():
base_img = cv2.imread("background.jpg", cv2.IMREAD_GRAYSCALE)
if base_img is None: return
# Initialize a blank mask and define a rectangular ROI
roi_mask = np.zeros(base_img.shape[:2], dtype=np.uint8)
roi_mask[50:150, 50:150] = 255
# Extract only the masked region
extracted_roi = cv2.bitwise_and(base_img, base_img, mask=roi_mask)
cv2.imshow("Defined Mask", roi_mask)
cv2.imshow("Extracted ROI", extracted_roi)
cv2.waitKey(0)
cv2.destroyAllWindows()
Visualization with Matplotlib
Matplotlib expects RGB color ordering and specific normalization. Converting color spaces or handling grayscale colormaps ensures accurate on-screen rendering.
import cv2
import matplotlib.pyplot as plt
def render_with_matplotlib():
raw = cv2.imread("color_sample.jpg")
if raw is None: return
rgb_converted = cv2.cvtColor(raw, cv2.COLOR_BGR2RGB)
gray_version = cv2.cvtColor(raw, cv2.COLOR_BGR2GRAY)
plt.figure(figsize=(8, 6))
plt.subplot(2, 2, 1), plt.imshow(raw), plt.title("OpenCV BGR (Incorrect Colors)"), plt.axis('off')
plt.subplot(2, 2, 2), plt.imshow(rgb_converted), plt.title("Corrected RGB"), plt.axis('off')
plt.subplot(2, 2, 3), plt.imshow(gray_version), plt.title("Grayscale Raw"), plt.axis('off')
plt.subplot(2, 2, 4), plt.imshow(gray_version, cmap='gray'), plt.title("Grayscale Mapped"), plt.axis('off')
plt.tight_layout()
plt.show()