Core Training Mechanics in PyTorch: From Manual Gradients to Modular Networks
Manual Gradient Computation and Parameter Updates
To implement linear regression from scratch, define learnable parameters with gradient tracking enabled:
define_weights = torch.normal(0.0, 0.01, size=(2, 1), requires_grad=True)
bias_term = torch.zeros(1, requires_grad=True)
def compute_prediction(data, weights, bias):
return torch.mm(data, weights) + bias
def compute_mse_loss(pred, true):
true = true.view(pred.shape)
return 0.5 * (pred - true) ** 2
def apply_sgd_step(params, learning_rate, batch_count):
with torch.no_grad():
for p in params:
p -= learning_rate * p.grad / batch_count
p.grad.zero_()
Train using mini-batches:
learning_rate = 0.03
training_cycles = 3
for cycle in range(training_cycles):
for batch_x, batch_y in data_iter(batch_size=10, X=features, y=labels):
y_pred = compute_prediction(batch_x, define_weights, bias_term)
batch_loss = compute_mse_loss(y_pred, batch_y)
batch_loss.sum().backward() # Accumulate gradients into .grad
apply_sgd_step([define_weights, bias_term], learning_rate, 10)
# Evaluate full training loss
with torch.no_grad():
full_pred = compute_prediction(features, define_weights, bias_term)
epoch_loss = compute_mse_loss(full_pred, labels).mean()
print(f'Cycle {cycle + 1}, Mean Loss: {epoch_loss.item():.6f}')
Structured Neural Network Trainnig Pipeline
1. Essential Imports
import torch
import torch.nn as nn
import torch.optim as optim
2. Synthetic Dataset Construction
total_instances = 10000
input_features = torch.randn(total_instances, 2)
output_labels = torch.randint(0, 2, (total_instances,), dtype=torch.float32)
train_x, train_y = input_features[:8000], output_labels[:8000]
test_x, test_y = input_features[8000:], output_labels[8000:]
3. Model Definition Using nn.Module
class BinaryClassifier(nn.Module):
def __init__(self):
super().__init__()
self.dense_layer = nn.Linear(2, 1)
def forward(self, x):
return torch.sigmoid(self.dense_layer(x))
model_instance = BinaryClassifier()
4. Loss and Optimizer Setup
loss_function = nn.BCELoss()
optimizer_instance = optim.Adam(model_instance.parameters(), lr=0.01)
5. Trianing Loop with Gradeint Management
epochs = 1000
for step in range(epochs):
optimizer_instance.zero_grad()
outputs = model_instance(train_x)
error = loss_function(outputs.squeeze(), train_y)
error.backward()
optimizer_instance.step()
if (step + 1) % 100 == 0:
print(f'Step {step + 1}, BCE Loss: {error.item():.5f}')
6. Inference and Accuracy Assessment
model_instance.eval()
with torch.no_grad():
test_outputs = model_instance(test_x)
binary_predictions = (test_outputs.squeeze() > 0.5).float()
accuracy_score = (binary_predictions == test_y).float().mean().item()
print(f'Test Accuracy: {accuracy_score:.4f}')