import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision.transforms import ToTensor, Resize, Compose
from torchvision.utils import save_image
from PIL import Image
import os
from glob import glob
import cv2
import numpy as np
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from torchvision.datasets import ImageFolder
import os
import time
Self Supervised Learning on CIFAR10
Overview
Initially we train EfficientNet model on 5% of the data and then test it on 50% of the test data. Then we create the jigsaw pre training dataset on 45% of the images. Then the pretrained model is fine tuned on the 5% of images and tested on 50% of the data. Additional experiments like using pretrained weights for EfficientNet and varying hyperparameters have been carried out.
Import Libraries
Preparing DataLoaders
from torchvision.datasets import CIFAR10
= Compose([
transform # Resize images to 33x33 to make it divisible by 3 for the later jigsaw task.
33, 33)),
Resize((
ToTensor()
])# Load CIFAR-10 dataset
= CIFAR10(root='./data', train=True, download=True,
dataset =transform)
transform
# Split dataset
= int(0.05 * len(dataset))
train_size = int(0.45 * len(dataset))
pretrain_size = len(dataset) - train_size - pretrain_size
test_size = random_split(
train_dataset, pretrain_dataset, test_dataset
dataset, [train_size, pretrain_size, test_size])
# Create data loaders
= DataLoader(train_dataset, batch_size=32, shuffle=True)
train_loader = DataLoader(pretrain_dataset, batch_size=32, shuffle=True)
pretrain_loader = DataLoader(test_dataset, batch_size=32, shuffle=False) test_loader
Files already downloaded and verified
Initial training on 5% of the data
Initially using the efficient net model without pretraining.
from torchvision import models
= models.efficientnet_b0(pretrained=False)
model = model.classifier[1].in_features
num_ftrs 1] = nn.Linear(num_ftrs, 10)
model.classifier[= torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = model.to(device) model
/usr/lib/python3/dist-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.
warnings.warn(
/usr/lib/python3/dist-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=None`.
warnings.warn(msg)
# Loss function and optimizer
= nn.CrossEntropyLoss()
criterion = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
optimizer = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1) scheduler
# train on only 5% of the data
= []
loss_list = []
lr
for epoch in range(25): # loop over the dataset multiple times
scheduler.step()= 0.0
running_loss for i, data in enumerate(train_loader, 0):
= data
inputs, targets = inputs.to(device)
inputs = targets.to(device)
targets
optimizer.zero_grad()= model(inputs)
outputs = criterion(outputs, targets)
loss
loss.backward()
optimizer.step()
0])
lr.append(scheduler.get_lr()[
+= loss.item()
running_loss
if i % 10 == 9: # print every 10 mini-batches
print('[%d, %5d] loss: %.3f, lr: %.6f' %
+ 1, i + 1, running_loss / 10, scheduler.get_lr()[0]))
(epoch # append avg running loss every 10 epochs
/10)
loss_list.append(running_loss= 0.0 running_loss
2, 1, 1)
plt.subplot(='Training Loss')
plt.plot(loss_list, label'Training Loss over Batches')
plt.title('Batch')
plt.xlabel('Loss')
plt.ylabel(
plt.legend()
2, 1, 2)
plt.subplot(='Learning Rate', color='r')
plt.plot(lr, label'Learning Rate over Batches')
plt.title('Batch')
plt.xlabel('Learning Rate')
plt.ylabel(
plt.legend()
plt.tight_layout() plt.show()
# save the model state dict
torch.save(model.state_dict(),'./cifar10_5percent_scheduler_pretrained_false.pth')
Using a pretrained (on imagenet) efficienet model
from torchvision import models
= models.efficientnet_b0(pretrained=True)
model = model.classifier[1].in_features
num_ftrs 1] = nn.Linear(num_ftrs, 10)
model.classifier[= torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = model.to(device)
model # Loss function and optimizer
= nn.CrossEntropyLoss()
criterion = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
optimizer = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1) scheduler
/usr/lib/python3/dist-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=EfficientNet_B0_Weights.IMAGENET1K_V1`. You can also use `weights=EfficientNet_B0_Weights.DEFAULT` to get the most up-to-date weights.
warnings.warn(msg)
Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-3dd342df.pth" to /home/deus/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-3dd342df.pth
100%|██████████| 20.5M/20.5M [00:01<00:00, 14.2MB/s]
# train on only 5% of the data
= []
loss_list = []
lr
for epoch in range(25): # loop over the dataset multiple times
scheduler.step()= 0.0
running_loss for i, data in enumerate(train_loader, 0):
= data
inputs, targets = inputs.to(device)
inputs = targets.to(device)
targets
optimizer.zero_grad()= model(inputs)
outputs = criterion(outputs, targets)
loss
loss.backward()
optimizer.step()0])
lr.append(scheduler.get_lr()[+= loss.item()
running_loss
if i % 10 == 9: # print every 10 mini-batches
print('[%d, %5d] loss: %.3f, lr: %.6f' %
+ 1, i + 1, running_loss / 10, scheduler.get_lr()[0]))
(epoch /10)
loss_list.append(running_loss= 0.0 running_loss
2, 1, 1)
plt.subplot(='Training Loss')
plt.plot(loss_list, label'Training Loss over Batches')
plt.title('Batch')
plt.xlabel('Loss')
plt.ylabel(
plt.legend()
2, 1, 2)
plt.subplot(='Learning Rate', color='r')
plt.plot(lr, label'Learning Rate over Batches')
plt.title('Batch')
plt.xlabel('Learning Rate')
plt.ylabel(
plt.legend()
plt.tight_layout() plt.show()
# save the model state dict
torch.save(model.state_dict(),'./cifar10_5percent_scheduler_pretrained_true.pth')
Using adam optimiser (an attempt at hyperparameter tuning
from torchvision import models
= models.efficientnet_b0(pretrained=True)
model = model.classifier[1].in_features
num_ftrs 1] = nn.Linear(num_ftrs, 10)
model.classifier[= torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = model.to(device) model
import torch.optim as optim
# Loss function and optimizer
= nn.CrossEntropyLoss()
criterion = optim.Adam(model.parameters(), lr=0.01) # Using Adam optimizer
optimizer = 25
num_epochs = []
loss_list
# Training loop
for epoch in range(num_epochs):
= 0.0
running_loss for i, (inputs, targets) in enumerate(train_loader):
# inputs = reassemble_patches(inputs, grid_size=3)
= inputs.to(device), targets.to(device)
inputs, targets
# Forward pass
= model(inputs)
outputs = criterion(outputs, targets)
loss
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
+= loss.item()
running_loss if i % 10 == 9: # print every 10 mini-batches
print('[%d, %5d] loss: %.3f' %
+ 1, i + 1, running_loss / 10))
(epoch /10)
loss_list.append(running_loss= 0.0 running_loss
2, 1, 1)
plt.subplot(='Training Loss')
plt.plot(loss_list, label'Training Loss over Batches')
plt.title('Batch')
plt.xlabel('Loss')
plt.ylabel(
plt.legend()
2, 1, 2)
plt.subplot(='Learning Rate', color='r')
plt.plot(lr, label'Learning Rate over Batches')
plt.title('Batch')
plt.xlabel('Learning Rate')
plt.ylabel(
plt.legend()
plt.tight_layout() plt.show()
# save the model state dict
'./cifar10_5percent_adam_pretrained_true.pth') torch.save(model.state_dict(),
Image Jigsaw Puzzle Pretraining
from itertools import permutations
from torchvision.transforms import functional as F
def extract_patches(image, grid_size=3):
= image.size(1) // grid_size
patch_size = [F.crop(image, i, j, patch_size, patch_size)
patches for i in range(0, image.size(1), patch_size)
for j in range(0, image.size(2), patch_size)]
return patches
def apply_permutation(patches, perm):
return [patches[i] for i in perm]
# Example permutation generation (100 permutations)
= 100
num_permutations = np.array(list(permutations(range(9))))
all_perms = all_perms[np.random.choice(
selected_perms len(all_perms), num_permutations, replace=False)]
from torch.utils.data import Dataset
class JigsawPuzzleDataset(Dataset):
def __init__(self, dataset, permutations):
self.dataset = dataset
self.permutations = permutations
def __len__(self):
return len(self.dataset)
def __getitem__(self, idx):
= self.dataset[idx]
image, _ = np.random.choice(len(self.permutations))
perm_idx = self.permutations[perm_idx]
perm = apply_permutation(extract_patches(image), perm)
shuffled_patches # Convert list of patches to tensor
= torch.stack(shuffled_patches)
shuffled_image return shuffled_image, perm_idx
# Example usage
= JigsawPuzzleDataset(pretrain_dataset, selected_perms) pretrain_dataset
import matplotlib.pyplot as plt
import numpy as np
import torchvision.transforms as transforms
def visualize_jigsaw(original_image, permuted_patches, grid_size=3):
"""
Visualize the original and permuted image side by side.
Args:
original_image (Tensor): The original image tensor.
permuted_patches (Tensor): The permuted patches tensor.
grid_size (int): The size of the grid to divide the image into.
"""
# Convert tensors to numpy arrays
= original_image.permute(1, 2, 0).numpy()
original_image
# Reconstruct permuted image from patches
= original_image.shape[0] // grid_size
patch_size = permuted_patches.view(
permuted_image 3, patch_size, patch_size)
grid_size, grid_size, = permuted_image.permute(0, 3, 1, 4, 2).contiguous()
permuted_image = permuted_image.view(
permuted_image * patch_size, grid_size * patch_size, 3)
grid_size
# Plotting
= plt.subplots(1, 2, figsize=(10, 5))
fig, axes 0].imshow(original_image)
axes[0].set_title("Original Image")
axes[0].axis('off')
axes[
1].imshow(permuted_image)
axes[1].set_title("Permuted Image")
axes[1].axis('off')
axes[
plt.show()
# Example usage
# Assuming 'pretrain_jigsaw_dataset' is the JigsawPuzzleDataset instance
= pretrain_dataset.dataset[0] # Get an original image
original_image, _ = pretrain_dataset[0] # Get a permuted image
permuted_image, _ print(_)
visualize_jigsaw(original_image, permuted_image)
13
Using an EfficientNet model pretrained on ImageNet
from torchvision import models
# Load EfficientNet model
= models.efficientnet_b0(pretrained=True)
model # Modify the last layer for permutation prediction
= model.classifier[1].in_features
num_ftrs = 100 # Assuming 100 permutations
num_permutations 1] = nn.Linear(num_ftrs, num_permutations)
model.classifier[= DataLoader(pretrain_dataset, batch_size=32, shuffle=True)
pretrain_loader # Move the model to GPU if available
= torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = model.to(device) model
def reassemble_patches(patches, grid_size=3):
"""
Reassemble the shuffled patches into a single image tensor.
Converts 5d to 4d vector
"""
= patches.shape
batch_size, num_patches, channels, patch_height, patch_width = patches.view(batch_size, grid_size, grid_size,
patches
channels, patch_height, patch_width)= patches.permute(0, 1, 4, 2, 5, 3).contiguous()
patches = patches.view(batch_size, grid_size *
patches * patch_width, channels)
patch_height, grid_size # Rearrange axes to [batch_size, channels, height, width]
= patches.permute(0, 3, 1, 2)
patches return patches
import torch.optim as optim
# Loss function and optimizer
= nn.CrossEntropyLoss()
criterion = optim.Adam(model.parameters(), lr=0.01) # Using Adam optimizer
optimizer = 10
num_epochs = []
loss_list = []
lr
# Training loop
for epoch in range(num_epochs):
= 0.0
running_loss for i, (inputs, targets) in enumerate(pretrain_loader):
= reassemble_patches(inputs, grid_size=3)
inputs
= inputs.to(device), targets.to(device)
inputs, targets
# Forward pass
= model(inputs)
outputs = criterion(outputs, targets)
loss
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()0])
lr.append(scheduler.get_lr()[+= loss.item()
running_loss if i % 10 == 9: # print every 10 mini-batches
print('[%d, %5d] loss: %.3f' %
+ 1, i + 1, running_loss / 10))
(epoch /10)
loss_list.append(running_loss= 0.0 running_loss
2, 1, 1)
plt.subplot(='Training Loss')
plt.plot(loss_list, label'Training Loss over Batches')
plt.title('Batch')
plt.xlabel('Loss')
plt.ylabel(
plt.legend()
2, 1, 2)
plt.subplot(='Learning Rate', color='r')
plt.plot(lr, label'Learning Rate over Batches')
plt.title('Batch')
plt.xlabel('Learning Rate')
plt.ylabel(
plt.legend()
plt.tight_layout() plt.show()
from torchvision import models
import torch.nn as nn
import torch.optim as optim
import torch
# Get the number of input features to the last layer
= model.classifier[1].in_features
num_ftrs
# Reset the last layer for CIFAR-10 classification (10 classes)
1] = nn.Linear(num_ftrs, 10)
model.classifier[
# Move the model to GPU if available
= torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = model.to(device)
model
# Loss function and optimizer for fine-tuning
= nn.CrossEntropyLoss()
criterion # Using Adam optimizer, LR can be adjusted as needed
= optim.Adam(model.parameters(), lr=0.01)
optimizer
# Number of epochs for fine-tuning
= 25
num_fine_tune_epochs = []
loss_list
# Fine-tuning training loop
for epoch in range(num_fine_tune_epochs):
= 0.0
running_loss for i, data in enumerate(train_loader, 0):
# Get the inputs and labels
= data
inputs, labels = inputs.to(device), labels.to(device)
inputs, labels
# Zero the parameter gradients
optimizer.zero_grad()
# Forward pass
= model(inputs)
outputs = criterion(outputs, labels)
loss
# Backward and optimize
loss.backward()
optimizer.step()
# Print statistics
+= loss.item()
running_loss if i % 10 == 9: # print every 10 mini-batches
print('[%d, %5d] loss: %.3f' %
+ 1, i + 1, running_loss / 10))
(epoch /10)
loss_list.append(running_loss= 0.0 running_loss
2, 1, 1)
plt.subplot(='Training Loss')
plt.plot(loss_list, label'Training Loss over Batches')
plt.title('Batch')
plt.xlabel('Loss')
plt.ylabel(
plt.legend()
plt.tight_layout() plt.show()
'./pretrained.pth') torch.save(model.state_dict(),
Using an EfficientNet model without pretraining on imagenet
from torchvision import models
= models.efficientnet_b0(pretrained=False)
model
= model.classifier[1].in_features
num_ftrs = 100 # Assuming 100 permutations
num_permutations 1] = nn.Linear(num_ftrs, num_permutations)
model.classifier[= DataLoader(pretrain_dataset, batch_size=32, shuffle=True)
pretrain_loader
= torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = model.to(device) model
import torch.optim as optim
# Loss function and optimizer
= nn.CrossEntropyLoss()
criterion = optim.Adam(model.parameters(), lr=0.01) # Using Adam optimizer
optimizer = 10
num_epochs = []
loss_list
# Training loop
for epoch in range(num_epochs):
= 0.0
running_loss for i, (inputs, targets) in enumerate(pretrain_loader):
= reassemble_patches(inputs, grid_size=3)
inputs
= inputs.to(device), targets.to(device)
inputs, targets
# Forward pass
= model(inputs)
outputs = criterion(outputs, targets)
loss
optimizer.zero_grad()
loss.backward()
optimizer.step()
+= loss.item()
running_loss if i % 10 == 9: # print every 10 mini-batches
print('[%d, %5d] loss: %.3f' %
+ 1, i + 1, running_loss / 10))
(epoch /10)
loss_list.append(running_loss= 0.0 running_loss
2, 1, 1)
plt.subplot(='Training Loss')
plt.plot(loss_list, label'Training Loss over Batches')
plt.title('Batch')
plt.xlabel('Loss')
plt.ylabel(
plt.legend()
2, 1, 2)
plt.subplot(='Learning Rate', color='r')
plt.plot(lr, label'Learning Rate over Batches')
plt.title('Batch')
plt.xlabel('Learning Rate')
plt.ylabel(
plt.legend()
plt.tight_layout() plt.show()
An interesting observation is that there is hardly any drop in training loss as compared to the one with transferred weights, indicating the neural network doesn’t understand image feratures that well yet.
from torchvision import models
import torch.nn as nn
import torch.optim as optim
import torch
# Get the number of input features to the last layer
= model.classifier[1].in_features
num_ftrs
# Reset the last layer for CIFAR-10 classification (10 classes)
1] = nn.Linear(num_ftrs, 10)
model.classifier[
# Move the model to GPU if available
= torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = model.to(device)
model
# Loss function and optimizer for fine-tuning
= nn.CrossEntropyLoss()
criterion # Using Adam optimizer, LR can be adjusted as needed
= optim.Adam(model.parameters(), lr=0.01)
optimizer
# Number of epochs for fine-tuning
= 25
num_fine_tune_epochs = []
loss_list
# Fine-tuning training loop
for epoch in range(num_fine_tune_epochs):
= 0.0
running_loss for i, data in enumerate(train_loader, 0):
# Get the inputs and labels
= data
inputs, labels = inputs.to(device), labels.to(device)
inputs, labels
# Zero the parameter gradients
optimizer.zero_grad()
# Forward pass
= model(inputs)
outputs = criterion(outputs, labels)
loss
# Backward and optimize
loss.backward()
optimizer.step()
# Print statistics
+= loss.item()
running_loss if i % 10 == 9: # print every 10 mini-batches
print('[%d, %5d] loss: %.3f' %
+ 1, i + 1, running_loss / 10))
(epoch /10)
loss_list.append(running_loss= 0.0 running_loss
2, 1, 1)
plt.subplot(='Training Loss')
plt.plot(loss_list, label'Training Loss over Batches')
plt.title('Batch')
plt.xlabel('Loss')
plt.ylabel(
plt.legend()
plt.tight_layout() plt.show()
'./notpretrained.pth') torch.save(model.state_dict(),
import torch
import torch.nn as nn
from torchvision import models
from torch.utils.data import DataLoader
# Define the test_model function
def test_model(model, dataloader, device):
eval() # Set the model to evaluation mode
model.= 0
total_correct = 0
total_samples
with torch.no_grad():
for inputs, labels in dataloader:
= inputs.to(device), labels.to(device)
inputs, labels
= model(inputs)
outputs = torch.max(outputs, 1)
_, predicted
+= labels.size(0)
total_samples += (predicted == labels).sum().item()
total_correct
= total_correct / total_samples
accuracy return accuracy
# List of model file paths
= [
model_paths './cifar10_5percent_scheduler_pretrained_false.pth',
'./cifar10_5percent_adam_pretrained_true.pth',
'./cifar10_5percent_scheduler_pretrained_true.pth',
'./notpretrained.pth',
'./pretrained.pth'
]
# Loop through each model
for model_path in model_paths:
# Load the EfficientNet-B0 model
= models.efficientnet_b0(pretrained=False)
model
# Modify the classifier for CIFAR-10
= model.classifier[1].in_features
num_ftrs 1] = nn.Linear(num_ftrs, 10)
model.classifier[
# Load the model weights
model.load_state_dict(torch.load(model_path))
# Move the model to the device (e.g., GPU if available)
= torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = model.to(device)
model
# Test the model
= test_model(model, test_loader, device)
accuracy
# Print or store the accuracy for this model
print(f'Model: {model_path}, Accuracy: {accuracy*100:.2f}%')
Model: ./cifar10_5percent_scheduler_pretrained_false.pth, Accuracy: 39.82%
Model: ./cifar10_5percent_adam_pretrained_true.pth, Accuracy: 50.45%
Model: ./cifar10_5percent_scheduler_pretrained_true.pth, Accuracy: 63.28%
Model: ./notpretrained.pth, Accuracy: 17.29%
Model: ./pretrained.pth, Accuracy: 46.72%
Observations: * For training on 5% of the dataset best results are achieved when using pretrained model on imagenet that is 63% and 40% if not prior pretraining is done. * After traning on jigsaw images and then finetuning the best accuracy is 48%. This model uses transferred weights from a pretrained efficientnet on imagenet. The one without any transferred weights gets an accuracy of 34%. * Both the techniques achieve well over the baseline accuracy of 10%(random guessing).
Proposed Solutions to increase accuracy: By using the gap trick, we pad the input disordered images with zeros to the size of original images. Adopting the gap trick can discourage all the jigsaw puzzle solvers mentioned above from learning lowlevel statistics, and encourage the learning of high-level visuospatial representations of objects.
Data Augmentation can help generalising more and improve the self supervised learning.