Helo, I’m using wandb integrated with pytorch.
My experiments are working fine and I can see all the metrics loged on my dashboard.
But some how the loading bar from traing is not showing, Jus apears the values from the metrics that I chosed, but not even indicated witch epoch it is.
Is there something I’m doing wrong ?
Here is my code
import wandb
# Start a new wandb run to track this script
wandb.init(
# Set the wandb project where this run will be logged
project="test_ResNet_pytorch",
# Track hyperparameters and run metadata
config={
"learning_rate": 0.001,
"architecture": "ResNet18",
"dataset": "Dolphins_test_val",
"epochs": 10,
}
)
import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader
from torchvision import transforms
# Definir parâmetros de normalização para 4 canais (valores de exemplo)
normalize = transforms.Normalize(mean=[0.34421682, 0.3341824, 0.32139614, 0.32376203],
std=[0.11445788, 0.09811808, 0.08478498, 0.08529754])
# Função customizada para aplicar a normalização
def normalize_data(tensor):
# Certifique-se de que o tensor tem a forma [N, C, H, W]
if tensor.dim() == 4 and tensor.size(1) == 4:
return normalize(tensor)
else:
raise ValueError("O tensor deve ter a forma [N, C, H, W] com C = 4")
# Carregar arquivos .npy para arrays NumPy
train_positive = np.load('/home/flora_sauer/Documents/hd_part2/nps_gray_4channels/transposed/train_positive.npy')
train_negative = np.load('/home/flora_sauer/Documents/hd_part2/nps_gray_4channels/transposed/train_negative.npy')
test_positive = np.load('/home/flora_sauer/Documents/hd_part2/nps_gray_4channels/transposed/test_positive.npy')
test_negative = np.load('/home/flora_sauer/Documents/hd_part2/nps_gray_4channels/transposed/test_negative.npy')
# Converter arrays NumPy para tensores PyTorch e dividir por 255 para normalizar para [0, 1]
train_positive_tensor = torch.from_numpy(train_positive).float() / 255.0
train_negative_tensor = torch.from_numpy(train_negative).float() / 255.0
test_positive_tensor = torch.from_numpy(test_positive).float() / 255.0
test_negative_tensor = torch.from_numpy(test_negative).float() / 255.0
# Normalizar os tensores
train_positive_tensor = normalize_data(train_positive_tensor)
train_negative_tensor = normalize_data(train_negative_tensor)
test_positive_tensor = normalize_data(test_positive_tensor)
test_negative_tensor = normalize_data(test_negative_tensor)
# Criar rótulos
train_positive_labels = torch.ones(train_positive_tensor.size(0), dtype=torch.long)
train_negative_labels = torch.zeros(train_negative_tensor.size(0), dtype=torch.long)
test_positive_labels = torch.ones(test_positive_tensor.size(0), dtype=torch.long)
test_negative_labels = torch.zeros(test_negative_tensor.size(0), dtype=torch.long)
# Criar TensorDatasets
train_dataset = TensorDataset(torch.cat([train_positive_tensor, train_negative_tensor]),
torch.cat([train_positive_labels, train_negative_labels]))
test_dataset = TensorDataset(torch.cat([test_positive_tensor, test_negative_tensor]),
torch.cat([test_positive_labels, test_negative_labels]))
# Criar DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
# Exemplo de iteração no DataLoader
for images, labels in train_loader:
print(images.shape, labels.shape)
# Organizar datasets em um dicionário
image_datasets = {
'train': train_dataset,
'test': test_dataset
}
# Criar DataLoaders com base no dicionário image_datasets
dataloaders = {x: DataLoader(image_datasets[x], batch_size=32, shuffle=True, num_workers=4)
for x in ['train', 'test']}
# Obter o tamanho dos datasets
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'test']}
print(dataset_sizes)
# Definir manualmente os nomes das classes
class_names = ['negative', 'positive']
print(class_names)
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
import os
# Load the pre-trained ResNet-18 model
model = models.resnet18(pretrained = True)
# Modify the first convolutional layer to accept 4 channels instead of 3
in_channels = 4
out_channels = model.conv1.out_channels
kernel_size = model.conv1.kernel_size
stride = model.conv1.stride
padding = model.conv1.padding
# Create a new convolutional layer with 4 input channels
model.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False)
# Initialize the new convolutional layer weights
with torch.no_grad():
# Copy weights from the original 3-channel layer
model.conv1.weight[:, :3, :, :] = model.conv1.weight[:, :3, :, :]
# Initialize the additional channel (4th) with the same weights as the 1st channel
model.conv1.weight[:, 3:, :, :] = model.conv1.weight[:, :1, :, :]
# Modify the final classification layer to have 2 outputs instead of 1000
model.fc = nn.Linear(model.fc.in_features, len(class_names))
# Freeze all layers except the final classification layer
for name, param in model.named_parameters():
if "fc" in name: # Unfreeze the final classification layer [Fc means fully connected layer]
param.requires_grad = True
else:
param.requires_grad = False
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9) # Use all parameters
# Move the model to the GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score
# Training loop
num_epochs = 10
for epoch in range(num_epochs):
for phase in ['train', 'test']:
if phase == 'train':
model.train()
else:
model.eval()
# Variables to store loss, correct predictions, and for AUC
running_loss = 0.0
running_corrects = 0
all_labels = []
all_probs = []
for inputs, labels in dataloaders[phase]:
inputs = inputs.to(device) # Images
labels = labels.to(device) # Class labels
optimizer.zero_grad() # Clear the gradient from the previous iterations
with torch.set_grad_enabled(phase == 'train'):
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
if phase == 'train':
loss.backward()
optimizer.step()
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
# Store the true labels and predicted probabilities for AUC calculation
all_labels.extend(labels.cpu().numpy())
all_probs.extend(torch.nn.functional.softmax(outputs, dim=1)[:, 1].detach().cpu().numpy()) # Assuming binary classification
epoch_loss = running_loss / dataset_sizes[phase]
epoch_acc = running_corrects.double() / dataset_sizes[phase]
# Calculate AUC
epoch_auc = roc_auc_score(all_labels, all_probs)
print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f} AUC: {epoch_auc:.4f}')
# Log metrics to wandb, separate 'train' and 'test'
wandb.log({f"{phase}_acc": epoch_acc, f"{phase}_loss": epoch_loss, f"{phase}_auc": epoch_auc}, step=epoch)
# Plot confusion matrix for the 'test' phase
if phase == 'test':
# Convert probabilities to binary predictions (threshold = 0.5)
binary_preds = [1 if prob >= 0.5 else 0 for prob in all_probs]
# Compute confusion matrix
cm = confusion_matrix(all_labels, binary_preds)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(cmap=plt.cm.Blues)
# Display the confusion matrix
plt.title(f'Confusion Matrix Test Data for Epoch {epoch+1}')
if epoch + 1 == num_epochs:
plt.savefig('/home/flora_sauer/Documents/images/transf_4channels/confusion_matrix_test.png')
plt.show()
print("Training complete!")
# Save the model
torch.save(model.state_dict(), '/home/flora_sauer/Documents/models/transf_4channels/ResNet18_classification_model.pth')
wandb.finish()