import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoaderLab 11 - Neural Networks (+unsupervised learning) challenge
In this lab we will explore neural networks. Your goal will be to learn a good 3D representation of the images in FashionMNIST.
We will define a good representation as one in which each class corresponds to a 3D gaussian, and the 3D gaussians of each class are meaningfully separated in space.
We will use a classification task as a proxy to learn this good representation.
Loading data
transform = transforms.ToTensor()
train_dataset = datasets.FashionMNIST(
root="./data",
train=True,
download=True,
transform=transform
)
test_dataset = datasets.FashionMNIST(
root="./data",
train=False,
download=True,
transform=transform
)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
print(f"Train samples: {len(train_dataset)}")
print(f"Test samples: {len(test_dataset)}")
print(f"Image shape: {train_dataset[0][0].shape}")Train samples: 60000
Test samples: 10000
Image shape: torch.Size([1, 28, 28])
Dummy model training
class SimpleCNN(torch.nn.Module):
def __init__(self):
super().__init__()
self.features = torch.nn.Sequential(
torch.nn.Conv2d(1, 16, kernel_size=3, padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(2),
)
self.hidden = torch.nn.Sequential(
torch.nn.Flatten(),
torch.nn.Linear(16 * 14 * 14, 3),
torch.nn.ReLU(),
torch.nn.Linear(3, 3), # hidden layer with 3 neurons
torch.nn.ReLU(),
)
self.logits_layer = torch.nn.Linear(3, 10)
def embedding(self, x): # final embedding layer that outputs 3-dimensional features
x = self.features(x)
x = self.hidden(x)
return x
def forward(self, x):
x = self.embedding(x)
logits = self.logits_layer(x) # logits computed in architecture
return logits
device = "cpu" # if you have a GPU available you shuold consider using "cuda". But the lab should be doable on CPU as well.
model = SimpleCNN().to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
epochs = 1
for epoch in range(epochs): # training loop
model.train()
running_loss = 0.0
for images, labels in train_loader:
images, labels = images.to(device), labels.to(device)
optimizer.zero_grad()
loss = criterion(model(images), labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
model.eval()
correct, total = 0, 0
with torch.no_grad():
for images, labels in test_loader:
images, labels = images.to(device), labels.to(device)
preds = model(images).argmax(dim=1)
correct += (preds == labels).sum().item()
total += labels.size(0)
avg_loss = running_loss / len(train_loader)
acc = 100 * correct / total
print(f"Epoch {epoch+1}/{epochs} - loss: {avg_loss:.4f} - test acc: {acc:.2f}%")Epoch 1/1 - loss: 1.7586 - test acc: 29.49%
Saving trained model
IMPORTANT: your neural network architecture must have an “embedding” method with exactly this name, and its output should have dimension 3.
group_name = "demo" # replace with your actual group name
model_path = "{group_name}_nn_model.pt"
model.eval()
scripted_model = torch.jit.script(model.cpu())
scripted_model.save(model_path)How your model will be scored
from sklearn.mixture import GaussianMixture
from sklearn.metrics import adjusted_rand_score
loaded_model = torch.jit.load(model_path, map_location=device)
loaded_model.eval()
embeddings_loaded_batches = []
labels_loaded_batches = []
with torch.no_grad():
for images, labels in test_loader: ## this will be changed from test_loader to a custom dataloader for evaluation that will not be shared.
x = images.to(device)
# Use scripted method if available; otherwise rebuild embedding from submodules
if hasattr(loaded_model, "embedding"): ## MAKE SURE YOUR MODEL HAS THIS METHOD AS SHOWN IN THE ARCHITECTURE
emb = loaded_model.embedding(x) ## MAKE SURE THE EMBEDDING WORKS GIVEN AN INPUT AS THE ONE FROM THIS ROUTINE
else:
raise NotImplementedError("Loaded model does not have an 'embedding' method. Cannot extract embeddings.")
embeddings_loaded_batches.append(emb.cpu())
labels_loaded_batches.append(labels.cpu())
Z_loaded = torch.cat(embeddings_loaded_batches, dim=0).numpy()
y_loaded = torch.cat(labels_loaded_batches, dim=0).numpy()
if Z_loaded.shape[1] != 3:
raise ValueError(f"Expected embedding dimension of 3, but got {Z_loaded.shape[1]}") ## CHECK EMBEDDING DIMENSION, IT MUST BE 3
gmm = GaussianMixture(
n_components=10,
covariance_type="full",
random_state=42,
init_params="kmeans"
)
gmm_labels = gmm.fit_predict(Z_loaded)
ari = adjusted_rand_score(y_loaded, gmm_labels)
print(f"Adjusted Rand Index (ARI): {ari:.4f}")Adjusted Rand Index (ARI): 0.2550
Visualizing the model (not relevant for evaluation)
import plotly.express as px
class_names = test_dataset.classes if hasattr(test_dataset, "classes") else [str(i) for i in range(10)]
label_names = [class_names[i] for i in y_loaded]
fig = px.scatter_3d(
x=Z_loaded[:, 0],
y=Z_loaded[:, 1],
z=Z_loaded[:, 2],
color=label_names,
opacity=0.7,
labels={"x": "emb_1", "y": "emb_2", "z": "emb_3", "color": "class"},
title="3D embedding point cloud (test set)"
)
fig.update_traces(marker=dict(size=2))
fig.show()Unable to display output for mime type(s): application/vnd.plotly.v1+json
Allowed methods to use
You must train a neural network model. You may use any architecture you like, as long at the final “.pt” file does not exceed 5 MB. Moreover, as emphasized earlier, your architecture should have a “embedding” method and this method should output a 3D feature vector given an input with the same dimensions as FashionMNIST.
Submission timeline and grading
You will be graded on effort and completion. You should submit your architecture (AND YOUR CODE) in two occasions:
- 24 hours after the lab session from April 20th.
- 24 hours after the lab session from April 27th.
We will publish a leaderboard twice as usual, and the top 2 scoring methods on the last leaderboard will gain some extra points.
You may work in groups of up to 4 as usual.