Automated Diabetic Retinopathy Grading via Deep Neural Networks and Traditional Machine Learning
Dataset Characteristics and Preprocessing
The dataset comprises 1000 fundus photographs categorized into four severity levels of diabetic retinopathy. Initial exploratory analysis revealed significant class imbalance across the severity grades. Too mitigate this, targeted augmentation strategies were implemented: horizontal and vertical flipping for the underrepresented moderate and severe classes, while preserving the original distribution of normal and mild cases. The final split allocated 70% for training and 30% for validation.
Convolutional Neural Network Implementation
For feature extraction and classification, a transfer learning approach was adopted utilizing pretrained architectures from the torchvision repository. The ResNet101 architecture was selected as the primary backbone due to its deep residual connections, initialized with ImageNet weights.
from torchvision import models
import torch.nn as nn
# Initialize pretrained backbone
backbone = models.resnet101(pretrained=True)
# Modify classifier head for 4-class grading
input_dim = backbone.fc.in_features
backbone.fc = nn.Sequential(
nn.Dropout(0.4),
nn.Linear(input_dim, 4)
)
Training Configuration
The optimization pipeline utilized AdamW optimizer with weight decay regularization and a cosine annealing learning rate scheduler.
import torch
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
backbone = backbone.to(device)
hyperparameters = {
'initial_lr': 5e-5,
'batch_size': 16,
'epochs': 25,
'weight_decay': 1e-4
}
optimizer = AdamW(
backbone.parameters(),
lr=hyperparameters['initial_lr'],
weight_decay=hyperparameters['weight_decay']
)
criterion = nn.CrossEntropyLoss()
scheduler = CosineAnnealingLR(optimizer, T_max=hyperparameters['epochs'])
Data Loading Pipeline
A custom PyTorch Dataset class was constructed to handle the fundus image loading and preprocessing:
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os
import pandas as pd
from torchvision import transforms
class FundusDataset(Dataset):
def __init__(self, annotations_file, img_dir, transform=None):
self.annotations = pd.read_csv(annotations_file)
self.image_directory = img_dir
self.transform = transform
def __len__(self):
return len(self.annotations)
def __getitem__(self, idx):
img_name = self.annotations.iloc[idx, 0] + ".png"
img_path = os.path.join(self.image_directory, img_name)
image = Image.open(img_path).convert('RGB')
label = torch.tensor(self.annotations.iloc[idx, 1], dtype=torch.long)
if self.transform:
image = self.transform(image)
return image, label
# Preprocessing pipeline
preprocessing = transforms.Compose([
transforms.Resize((512, 512)),
transforms.RandomHorizontalFlip(p=0.5),
transforms.RandomRotation(15),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
train_data = FundusDataset(
annotations_file="train_labels.csv",
img_dir="/data/fundus_images/",
transform=preprocessing
)
train_loader = DataLoader(
train_data,
batch_size=hyperparameters['batch_size'],
shuffle=True,
num_workers=4
)
Training Loop
from tqdm import tqdm
def training_epoch(model, loader, optimizer, criterion, device):
model.train()
running_loss = 0.0
for inputs, targets in tqdm(loader, desc="Training"):
inputs, targets = inputs.to(device), targets.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
running_loss += loss.item() * inputs.size(0)
return running_loss / len(loader.dataset)
# Main training loop
for epoch in range(hyperparameters['epochs']):
epoch_loss = training_epoch(backbone, train_loader, optimizer, criterion, device)
scheduler.step()
print(f"Epoch {epoch+1}/{hyperparameters['epochs']} - Loss: {epoch_loss:.4f}")
Ensemble Evaluation Strategy
To enhance classification robustness, a hybrid ensemble approach combining ResNet101 and DenseNet121 was implemented. The ensemble logic leverages the complementary strengths of both architectures, with ResNet excelling at normal case detection and DenseNet providing superior granularity for proliferative cases.
def evaluate_ensemble(model_a, model_b, loader, device):
model_a.eval()
model_b.eval()
class_correct = {i: 0 for i in range(4)}
class_total = {i: 0 for i in range(4)}
with torch.no_grad():
for images, labels in tqdm(loader):
images = images.to(device)
labels = labels.to(device)
outputs_a = model_a(images)
outputs_b = model_b(images)
# Weighted averaging ensemble
ensemble_outputs = 0.6 * outputs_a + 0.4 * outputs_b
_, predictions = ensemble_outputs.max(1)
for cls in range(4):
mask = (labels == cls)
class_total[cls] += mask.sum().item()
class_correct[cls] += ((predictions == cls) & mask).sum().item()
for cls in range(4):
accuracy = class_correct[cls] / class_total[cls] if class_total[cls] > 0 else 0
print(f"Severity Level {cls}: {accuracy:.2%} ({class_correct[cls]}/{class_total[cls]})")
overall_acc = sum(class_correct.values()) / sum(class_total.values())
print(f"Overall Accuracy: {overall_acc:.2%}")
Tabular Feature Analysis
Beyond image-based deep learning, traditional machine learning approaches were applied to the Messidor dataset's extracted features. The dataset contains 20 quantitative attributes representing lesion characteristics and anatomical descriptors.
from scipy.io import arff
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_curve, auc
import matplotlib.pyplot as plt
# Load ARFF format
raw_data, meta = arff.loadarff('messidor_features.arff')
feature_df = pd.DataFrame(raw_data)
# Binary classification: presence vs absence of retinopathy
feature_columns = [f'feature_{i}' for i in range(19)]
target_column = 'target'
feature_df[target_column] = feature_df[target_column].astype(int)
X = feature_df[feature_columns]
y = feature_df[target_column]
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42, stratify=y
)
# Regularized logistic regression
clf = LogisticRegression(
penalty='l2',
C=0.1,
max_iter=5000,
solver='lbfgs',
class_weight='balanced'
)
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
print(classification_report(y_test, predictions))
Performance Visualization
The Receiver Operating Characteristic (ROC) curve and feature importance analysis provide interpretability for the clinical decision support system.
# ROC Analysis
probabilities = clf.predict_proba(X_test)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, probabilities)
roc_auc = auc(fpr, tpr)
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Diabetic Retinopathy Detection ROC')
plt.legend(loc="lower right")
plt.show()
# Feature importance visualization
coefficients = pd.DataFrame({
'Feature': feature_columns,
'Coefficient': clf.coef_[0]
})
coefficients['Abs_Coeff'] = coefficients['Coefficient'].abs()
coefficients = coefficients.sort_values('Abs_Coeff', ascending=True)
plt.barh(coefficients['Feature'], coefficients['Coefficient'], color='steelblue')
plt.xlabel('Regression Coefficient')
plt.title('Feature Importance for Retinopathy Prediction')
plt.tight_layout()
plt.show()