Documentation Index
Fetch the complete documentation index at: https://mintlify.com/OverCV/UC-Intel-Final/llms.txt
Use this file to discover all available pages before exploring further.
Overview
The optimizers module provides factory functions for creating PyTorch optimizers, learning rate schedulers, and loss functions based on configuration dictionaries. It also includes a custom Focal Loss implementation for handling class imbalance.
Classes
FocalLoss
Focal Loss implementation for imbalanced multi-class classification. Reduces loss contribution from easy examples and focuses on hard examples.
class FocalLoss(nn.Module):
def __init__(
self,
alpha: torch.Tensor | None = None,
gamma: float = 2.0,
reduction: str = "mean"
)
alpha
torch.Tensor
default:"None"
Class weights tensor of shape (num_classes,). Used to handle class imbalance.
Focusing parameter. Higher values down-weight easy examples more. Typical range: [0, 5]
Specifies the reduction to apply: “none”, “mean”, or “sum”
Formula:
FL(pt) = -alpha * (1 - pt)^gamma * log(pt)
where pt = exp(-CE_loss)
When to use:
- Highly imbalanced datasets
- When many examples are easy to classify
- When you want to focus training on hard examples
Example
import torch
import torch.nn as nn
from training.optimizers import FocalLoss
from training.dataset import compute_class_weights
# Compute class weights
class_weights = compute_class_weights(train_labels, num_classes=9)
# Create focal loss
criterion = FocalLoss(
alpha=class_weights,
gamma=2.0,
reduction="mean"
)
# Use in training
outputs = model(inputs)
loss = criterion(outputs, targets)
loss.backward()
Functions
create_optimizer
Creates a PyTorch optimizer from training configuration.
def create_optimizer(model: nn.Module, config: dict) -> torch.optim.Optimizer
PyTorch model whose parameters will be optimized
Training configuration dictionary with keys:
optimizer: Optimizer name (“Adam”, “AdamW”, “SGD with Momentum”, “RMSprop”)
learning_rate: Learning rate (default: 0.001)
l2_decay: Whether to enable L2 regularization (default: False)
l2_lambda: L2 regularization coefficient (default: 0.0001)
Returns: Configured optimizer instance
Supported Optimizers:
| Optimizer | Description | Best For |
|---|
| Adam | Adaptive learning rate with momentum | General purpose, default choice |
| AdamW | Adam with decoupled weight decay | Better generalization than Adam |
| SGD with Momentum | Classic SGD with momentum=0.9 | Large batch training, simple models |
| RMSprop | Adaptive learning rate | RNNs, online learning |
Example
from training.optimizers import create_optimizer
# Basic Adam optimizer
config = {
"optimizer": "Adam",
"learning_rate": 0.001,
"l2_decay": False
}
optimizer = create_optimizer(model, config)
# AdamW with weight decay
config = {
"optimizer": "AdamW",
"learning_rate": 0.0001,
"l2_decay": True,
"l2_lambda": 0.01
}
optimizer = create_optimizer(model, config)
# SGD with momentum and L2 regularization
config = {
"optimizer": "SGD with Momentum",
"learning_rate": 0.01,
"l2_decay": True,
"l2_lambda": 0.0001
}
optimizer = create_optimizer(model, config)
create_scheduler
Creates a learning rate scheduler from training configuration.
def create_scheduler(
optimizer: torch.optim.Optimizer,
config: dict,
steps_per_epoch: int,
) -> torch.optim.lr_scheduler.LRScheduler | None
optimizer
torch.optim.Optimizer
required
Optimizer instance to schedule
Training configuration dictionary with keys:
lr_strategy: Scheduler name (“Constant”, “ReduceLROnPlateau”, “Cosine Annealing”, “Step Decay”, “Exponential Decay”)
epochs: Total number of training epochs
Number of training steps per epoch (length of train DataLoader)
Returns: Configured scheduler instance or None if using constant learning rate
Supported Schedulers:
| Scheduler | Description | Parameters |
|---|
| Constant | No scheduling | None |
| ReduceLROnPlateau | Reduce LR when metric plateaus | factor=0.5, patience=5, min_lr=1e-6 |
| Cosine Annealing | Cosine decay to minimum | T_max=epochs, eta_min=1e-6 |
| Step Decay | Reduce LR at fixed intervals | step_size=epochs/3, gamma=0.1 |
| Exponential Decay | Exponential decay | gamma=0.95 |
Example
from training.optimizers import create_optimizer, create_scheduler
optimizer = create_optimizer(model, training_config)
# Constant learning rate
config = {"lr_strategy": "Constant"}
scheduler = create_scheduler(optimizer, config, steps_per_epoch=100)
# Returns None
# ReduceLROnPlateau - reduces LR when validation loss plateaus
config = {
"lr_strategy": "ReduceLROnPlateau",
"epochs": 100
}
scheduler = create_scheduler(optimizer, config, steps_per_epoch=100)
# Call with: scheduler.step(val_loss)
# Cosine Annealing - smooth decay
config = {
"lr_strategy": "Cosine Annealing",
"epochs": 100
}
scheduler = create_scheduler(optimizer, config, steps_per_epoch=100)
# Call with: scheduler.step()
# Step Decay - drops LR at fixed intervals
config = {
"lr_strategy": "Step Decay",
"epochs": 90
}
scheduler = create_scheduler(optimizer, config, steps_per_epoch=100)
# Reduces LR by 0.1x every 30 epochs
create_criterion
Creates a loss function from training configuration.
def create_criterion(
config: dict,
class_weights: torch.Tensor | None = None,
device: torch.device | None = None,
) -> nn.Module
Training configuration dictionary with keys:
class_weights: Loss type (“None”, “Auto Class Weights”, “Focal Loss”)
class_weights
torch.Tensor
default:"None"
Optional class weights tensor from compute_class_weights()
device
torch.device
default:"None"
Device to move class_weights to
Returns: Loss function (nn.CrossEntropyLoss or FocalLoss)
Loss Functions:
| Type | Loss Function | Use Case |
|---|
| None | CrossEntropyLoss() | Balanced datasets |
| Auto Class Weights | CrossEntropyLoss(weight=class_weights) | Imbalanced datasets |
| Focal Loss | FocalLoss(alpha=class_weights) | Highly imbalanced, hard examples |
Example
from training.optimizers import create_criterion
from training.dataset import compute_class_weights
# Standard cross entropy
config = {"class_weights": "None"}
criterion = create_criterion(config)
# Weighted cross entropy for imbalanced data
class_weights = compute_class_weights(train_labels, num_classes=9)
config = {"class_weights": "Auto Class Weights"}
criterion = create_criterion(config, class_weights, device)
# Focal loss for highly imbalanced data
class_weights = compute_class_weights(train_labels, num_classes=9)
config = {"class_weights": "Focal Loss"}
criterion = create_criterion(config, class_weights, device)
Complete Training Setup
import torch
import torch.nn as nn
from training.dataset import create_dataloaders
from training.optimizers import create_optimizer, create_scheduler, create_criterion
from training.engine import TrainingEngine
# Configuration
training_config = {
"optimizer": "AdamW",
"learning_rate": 0.0001,
"l2_decay": True,
"l2_lambda": 0.01,
"lr_strategy": "Cosine Annealing",
"epochs": 100,
"batch_size": 32,
"class_weights": "Focal Loss"
}
dataset_config = {
"dataset_path": "dataset",
"preprocessing": {"target_size": (224, 224)},
"augmentation": {"preset": "Moderate"}
}
model_config = {
"architecture": "ResNet50",
"num_classes": 9,
"pretrained": True
}
# Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = build_model(model_config).to(device)
# Create dataloaders
dataloaders, class_names, class_weights = create_dataloaders(
dataset_config,
training_config
)
# Create optimizer, scheduler, and loss
optimizer = create_optimizer(model, training_config)
scheduler = create_scheduler(
optimizer,
training_config,
steps_per_epoch=len(dataloaders["train"])
)
criterion = create_criterion(
training_config,
class_weights,
device
)
# Create training engine
engine = TrainingEngine(
model=model,
train_loader=dataloaders["train"],
val_loader=dataloaders["val"],
optimizer=optimizer,
criterion=criterion,
device=device,
scheduler=scheduler,
early_stopping_patience=10
)
# Train
results = engine.fit(epochs=training_config["epochs"])
print(f"Training completed in {results['duration']}")
print(f"Best validation loss: {results['best_val_loss']:.4f}")
Learning Rate Schedule Visualization
import torch
import matplotlib.pyplot as plt
from training.optimizers import create_optimizer, create_scheduler
model = nn.Linear(10, 10)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# Test different schedulers
schedulers = [
("Cosine Annealing", "Cosine Annealing"),
("Step Decay", "Step Decay"),
("Exponential Decay", "Exponential Decay")
]
plt.figure(figsize=(12, 4))
for idx, (name, strategy) in enumerate(schedulers):
config = {"lr_strategy": strategy, "epochs": 100}
scheduler = create_scheduler(optimizer, config, steps_per_epoch=100)
lrs = []
for epoch in range(100):
lrs.append(optimizer.param_groups[0]["lr"])
if scheduler:
scheduler.step()
plt.subplot(1, 3, idx + 1)
plt.plot(lrs)
plt.title(name)
plt.xlabel("Epoch")
plt.ylabel("Learning Rate")
plt.grid(True)
# Reset optimizer
for param_group in optimizer.param_groups:
param_group["lr"] = 0.001
plt.tight_layout()
plt.savefig("lr_schedules.png")