Skip to main content

Documentation Index

Fetch the complete documentation index at: https://mintlify.com/OverCV/UC-Intel-Final/llms.txt

Use this file to discover all available pages before exploring further.

Overview

The optimizers module provides factory functions for creating PyTorch optimizers, learning rate schedulers, and loss functions based on configuration dictionaries. It also includes a custom Focal Loss implementation for handling class imbalance.

Classes

FocalLoss

Focal Loss implementation for imbalanced multi-class classification. Reduces loss contribution from easy examples and focuses on hard examples.
class FocalLoss(nn.Module):
    def __init__(
        self,
        alpha: torch.Tensor | None = None,
        gamma: float = 2.0,
        reduction: str = "mean"
    )
alpha
torch.Tensor
default:"None"
Class weights tensor of shape (num_classes,). Used to handle class imbalance.
gamma
float
default:"2.0"
Focusing parameter. Higher values down-weight easy examples more. Typical range: [0, 5]
reduction
str
default:"mean"
Specifies the reduction to apply: “none”, “mean”, or “sum”
Formula:
FL(pt) = -alpha * (1 - pt)^gamma * log(pt)

where pt = exp(-CE_loss)
When to use:
  • Highly imbalanced datasets
  • When many examples are easy to classify
  • When you want to focus training on hard examples

Example

import torch
import torch.nn as nn
from training.optimizers import FocalLoss
from training.dataset import compute_class_weights

# Compute class weights
class_weights = compute_class_weights(train_labels, num_classes=9)

# Create focal loss
criterion = FocalLoss(
    alpha=class_weights,
    gamma=2.0,
    reduction="mean"
)

# Use in training
outputs = model(inputs)
loss = criterion(outputs, targets)
loss.backward()

Functions

create_optimizer

Creates a PyTorch optimizer from training configuration.
def create_optimizer(model: nn.Module, config: dict) -> torch.optim.Optimizer
model
nn.Module
required
PyTorch model whose parameters will be optimized
config
dict
required
Training configuration dictionary with keys:
  • optimizer: Optimizer name (“Adam”, “AdamW”, “SGD with Momentum”, “RMSprop”)
  • learning_rate: Learning rate (default: 0.001)
  • l2_decay: Whether to enable L2 regularization (default: False)
  • l2_lambda: L2 regularization coefficient (default: 0.0001)
Returns: Configured optimizer instance Supported Optimizers:
OptimizerDescriptionBest For
AdamAdaptive learning rate with momentumGeneral purpose, default choice
AdamWAdam with decoupled weight decayBetter generalization than Adam
SGD with MomentumClassic SGD with momentum=0.9Large batch training, simple models
RMSpropAdaptive learning rateRNNs, online learning

Example

from training.optimizers import create_optimizer

# Basic Adam optimizer
config = {
    "optimizer": "Adam",
    "learning_rate": 0.001,
    "l2_decay": False
}
optimizer = create_optimizer(model, config)

# AdamW with weight decay
config = {
    "optimizer": "AdamW",
    "learning_rate": 0.0001,
    "l2_decay": True,
    "l2_lambda": 0.01
}
optimizer = create_optimizer(model, config)

# SGD with momentum and L2 regularization
config = {
    "optimizer": "SGD with Momentum",
    "learning_rate": 0.01,
    "l2_decay": True,
    "l2_lambda": 0.0001
}
optimizer = create_optimizer(model, config)

create_scheduler

Creates a learning rate scheduler from training configuration.
def create_scheduler(
    optimizer: torch.optim.Optimizer,
    config: dict,
    steps_per_epoch: int,
) -> torch.optim.lr_scheduler.LRScheduler | None
optimizer
torch.optim.Optimizer
required
Optimizer instance to schedule
config
dict
required
Training configuration dictionary with keys:
  • lr_strategy: Scheduler name (“Constant”, “ReduceLROnPlateau”, “Cosine Annealing”, “Step Decay”, “Exponential Decay”)
  • epochs: Total number of training epochs
steps_per_epoch
int
required
Number of training steps per epoch (length of train DataLoader)
Returns: Configured scheduler instance or None if using constant learning rate Supported Schedulers:
SchedulerDescriptionParameters
ConstantNo schedulingNone
ReduceLROnPlateauReduce LR when metric plateausfactor=0.5, patience=5, min_lr=1e-6
Cosine AnnealingCosine decay to minimumT_max=epochs, eta_min=1e-6
Step DecayReduce LR at fixed intervalsstep_size=epochs/3, gamma=0.1
Exponential DecayExponential decaygamma=0.95

Example

from training.optimizers import create_optimizer, create_scheduler

optimizer = create_optimizer(model, training_config)

# Constant learning rate
config = {"lr_strategy": "Constant"}
scheduler = create_scheduler(optimizer, config, steps_per_epoch=100)
# Returns None

# ReduceLROnPlateau - reduces LR when validation loss plateaus
config = {
    "lr_strategy": "ReduceLROnPlateau",
    "epochs": 100
}
scheduler = create_scheduler(optimizer, config, steps_per_epoch=100)
# Call with: scheduler.step(val_loss)

# Cosine Annealing - smooth decay
config = {
    "lr_strategy": "Cosine Annealing",
    "epochs": 100
}
scheduler = create_scheduler(optimizer, config, steps_per_epoch=100)
# Call with: scheduler.step()

# Step Decay - drops LR at fixed intervals
config = {
    "lr_strategy": "Step Decay",
    "epochs": 90
}
scheduler = create_scheduler(optimizer, config, steps_per_epoch=100)
# Reduces LR by 0.1x every 30 epochs

create_criterion

Creates a loss function from training configuration.
def create_criterion(
    config: dict,
    class_weights: torch.Tensor | None = None,
    device: torch.device | None = None,
) -> nn.Module
config
dict
required
Training configuration dictionary with keys:
  • class_weights: Loss type (“None”, “Auto Class Weights”, “Focal Loss”)
class_weights
torch.Tensor
default:"None"
Optional class weights tensor from compute_class_weights()
device
torch.device
default:"None"
Device to move class_weights to
Returns: Loss function (nn.CrossEntropyLoss or FocalLoss) Loss Functions:
TypeLoss FunctionUse Case
NoneCrossEntropyLoss()Balanced datasets
Auto Class WeightsCrossEntropyLoss(weight=class_weights)Imbalanced datasets
Focal LossFocalLoss(alpha=class_weights)Highly imbalanced, hard examples

Example

from training.optimizers import create_criterion
from training.dataset import compute_class_weights

# Standard cross entropy
config = {"class_weights": "None"}
criterion = create_criterion(config)

# Weighted cross entropy for imbalanced data
class_weights = compute_class_weights(train_labels, num_classes=9)
config = {"class_weights": "Auto Class Weights"}
criterion = create_criterion(config, class_weights, device)

# Focal loss for highly imbalanced data
class_weights = compute_class_weights(train_labels, num_classes=9)
config = {"class_weights": "Focal Loss"}
criterion = create_criterion(config, class_weights, device)

Complete Training Setup

import torch
import torch.nn as nn
from training.dataset import create_dataloaders
from training.optimizers import create_optimizer, create_scheduler, create_criterion
from training.engine import TrainingEngine

# Configuration
training_config = {
    "optimizer": "AdamW",
    "learning_rate": 0.0001,
    "l2_decay": True,
    "l2_lambda": 0.01,
    "lr_strategy": "Cosine Annealing",
    "epochs": 100,
    "batch_size": 32,
    "class_weights": "Focal Loss"
}

dataset_config = {
    "dataset_path": "dataset",
    "preprocessing": {"target_size": (224, 224)},
    "augmentation": {"preset": "Moderate"}
}

model_config = {
    "architecture": "ResNet50",
    "num_classes": 9,
    "pretrained": True
}

# Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = build_model(model_config).to(device)

# Create dataloaders
dataloaders, class_names, class_weights = create_dataloaders(
    dataset_config,
    training_config
)

# Create optimizer, scheduler, and loss
optimizer = create_optimizer(model, training_config)

scheduler = create_scheduler(
    optimizer,
    training_config,
    steps_per_epoch=len(dataloaders["train"])
)

criterion = create_criterion(
    training_config,
    class_weights,
    device
)

# Create training engine
engine = TrainingEngine(
    model=model,
    train_loader=dataloaders["train"],
    val_loader=dataloaders["val"],
    optimizer=optimizer,
    criterion=criterion,
    device=device,
    scheduler=scheduler,
    early_stopping_patience=10
)

# Train
results = engine.fit(epochs=training_config["epochs"])
print(f"Training completed in {results['duration']}")
print(f"Best validation loss: {results['best_val_loss']:.4f}")

Learning Rate Schedule Visualization

import torch
import matplotlib.pyplot as plt
from training.optimizers import create_optimizer, create_scheduler

model = nn.Linear(10, 10)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Test different schedulers
schedulers = [
    ("Cosine Annealing", "Cosine Annealing"),
    ("Step Decay", "Step Decay"),
    ("Exponential Decay", "Exponential Decay")
]

plt.figure(figsize=(12, 4))

for idx, (name, strategy) in enumerate(schedulers):
    config = {"lr_strategy": strategy, "epochs": 100}
    scheduler = create_scheduler(optimizer, config, steps_per_epoch=100)
    
    lrs = []
    for epoch in range(100):
        lrs.append(optimizer.param_groups[0]["lr"])
        if scheduler:
            scheduler.step()
    
    plt.subplot(1, 3, idx + 1)
    plt.plot(lrs)
    plt.title(name)
    plt.xlabel("Epoch")
    plt.ylabel("Learning Rate")
    plt.grid(True)
    
    # Reset optimizer
    for param_group in optimizer.param_groups:
        param_group["lr"] = 0.001

plt.tight_layout()
plt.savefig("lr_schedules.png")