--- /dev/null
+import torch
+
+# Check if GPU is available
+if torch.cuda.is_available():
+ device = torch.device("cuda")
+
+ # Optional: Print GPU model name for confirmation
+ gpu_name = torch.cuda.get_device_name(0)
+ print("="*50)
+ print(f"ā
GPU Detected and Available! Using device: {device}")
+ print(f" GPU Model: {gpu_name}")
+ print("="*50)
+else:
+ device = torch.device("cpu")
+ print("="*50)
+ print("ā ļø WARNING: CUDA not available. Falling back to CPU.")
+ print(" (If you have a GPU, ensure you installed the correct PyTorch version for CUDA.)")
+ print("="*50)
\ No newline at end of file
--- /dev/null
+import pandas as pd
+import os
+import cv2 # OpenCV for image handling
+
+# ----------------------------------------------
+# Task 1: Get data
+# - load data from files
+# - validate the existence of all images loaded in folder
+# - creates numpy array to store data attributes
+# ----------------------------------------------
+
+# Define your file paths
+csv_path = 'data/data/train.csv'
+image_folder_path = './data/data/' # <-- UPDATE THIS PATH!
+
+# Load the metadata
+try:
+ df = pd.read_csv(csv_path)
+ print("Metadata loaded successfully.")
+except FileNotFoundError:
+ print("Error: train.csv not found.")
+ exit()
+
+
+def check_image_paths(df, image_folder_path):
+ """Checks if all required images exist in the target folder."""
+ print("\n--- Running Image Path Check ---")
+
+ # Get a set of all names actually present in the folder
+ available_files = set(os.listdir(image_folder_path))
+
+ # The CSV names (must be standardized, e.g., convert to lowercase)
+ required_names = set(df['img_name'])
+
+ # Check for missing files
+ missing_files = required_names - available_files
+
+ if missing_files:
+ print(f"šØ WARNING: {len(missing_files)} images are missing! Examples: {list(missing_files)[:5]}")
+ # You might want to filter the DataFrame to only use the rows that have images
+ df = df[~df['img_name'].isin(missing_files)]
+ print(f"Cleaned DataFrame size: {len(df)}")
+ else:
+ print("ā
All required images were found in the directory.")
+
+ return df
+
+# ---> Execute the check
+df = check_image_paths(df, image_folder_path)
+
+
+def load_image_data(df, image_folder_path):
+ """Loads images and returns a list of (image_data, feature_data) tuples."""
+
+ processed_data = []
+
+ print("\n--- Loading Images and Features (This may take time) ---")
+
+ for index, row in df.iterrows():
+ img_name = row['img_name']
+
+ # Construct the full, absolute path
+ full_path = os.path.join(image_folder_path, img_name)
+
+ try:
+ # Load the image using OpenCV
+ image = cv2.imread(full_path)
+
+ if image is None:
+ print(f"Skipping row {index}: Could not load image at {full_path}")
+ continue
+
+ # Extract the pollutant/feature metadata
+ features = {
+ 'ozone': row['ozone'],
+ 'NO2': row['NO2'],
+ 'AOT': row['AOT'],
+ 'elevation': row['elevation']
+ }
+
+ # Store the combination
+ processed_data.append({
+ 'image': image, # The actual image NumPy array
+ 'metadata': features
+ })
+
+ except Exception as e:
+ print(f"An error occurred processing row {index}: {e}")
+
+ print("ā
Data loading complete.")
+ return processed_data
+
+# ---> Execute the full data loading
+combined_dataset = load_image_data(df, image_folder_path)
+
--- /dev/null
+# ----------------------------------------------
+# š” Cell 1: Setup and Imports
+# ----------------------------------------------
+
+# 1. Import Core Libraries
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader
+from torchvision import datasets, transforms
+import matplotlib.pyplot as plt
+import numpy as np
+
+# 2. Device Configuration (The most crucial check!)
+# This automatically detects and selects the GPU if available.
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"ā
Successfully initialized. Using device: {device}")
+
+# Optional: Check GPU details (Good for debugging)
+if device.type == 'cuda':
+ print(f"GPU Name: {torch.cuda.get_device_name(0)}")
+
+# ----------------------------------------------
+# š” Cell 2: Data Loading and Transformations
+# ----------------------------------------------
+
+# Define the preprocessing steps
+transform = transforms.Compose([
+ transforms.ToTensor(), # Converts the image to a Tensor
+ transforms.Normalize((0.5,), (0.5,)) # Normalizes pixel values (0 to 1)
+])
+
+# Download and Load the Dataset (Train set)
+train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
+
+# Create the DataLoader (handles batching and shuffling)
+BATCH_SIZE = 64
+train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
+
+# Repeat for the Test/Validation set
+test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
+test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)
+
+print("ā
Data loaded and prepared successfully!")
+
+# ----------------------------------------------
+# š” Cell 3: Model Definition and GPU Transfer
+# ----------------------------------------------
+
+# Define the CNN Model Architecture
+class SimpleCNN(nn.Module):
+ def __init__(self):
+ super(SimpleCNN, self).__init__()
+ # Convolutional layer: 1 channel in, 16 channels out, 3x3 kernel
+ self.conv1 = nn.Sequential(
+ nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1),
+ nn.ReLU(),
+ nn.MaxPool2d(kernel_size=2) # Halves the spatial dimensions
+ )
+ # Second convolutional layer
+ self.conv2 = nn.Sequential(
+ nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
+ nn.ReLU(),
+ nn.MaxPool2d(kernel_size=2)
+ )
+ # Fully connected layer (Calculate input size: 32 channels * 7 * 7)
+ self.fc = nn.Linear(32 * 7 * 7, 10)
+
+ def forward(self, x):
+ x = self.conv1(x)
+ x = self.conv2(x)
+ # Flatten the tensor for the linear layer
+ x = x.view(x.size(0), -1)
+ x = self.fc(x)
+ return x
+
+# Initialize the model
+model = SimpleCNN()
+
+# CRITICAL STEP: Move the entire model's parameters to the GPU
+model.to(device)
+
+print("ā
Model defined and weights transferred to the GPU!")
+
+
+# ----------------------------------------------
+# š” Cell 4: The Training Loop
+# ----------------------------------------------
+
+# Setup hyperparameters
+NUM_EPOCHS = 10
+LEARNING_RATE = 0.001
+
+# Loss function and Optimizer
+criterion = nn.CrossEntropyLoss()
+optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
+
+# Store history for plotting
+loss_history = []
+acc_history = []
+
+print("š Starting Training...")
+
+for epoch in range(NUM_EPOCHS):
+ # Set model to training mode
+ model.train()
+ total_loss = 0
+
+ for batch_idx, data in enumerate(train_loader):
+
+ # CRITICAL: Move data (inputs and labels) to the GPU!
+ images = data[0].to(device)
+ labels = data[1].to(device)
+
+ # 1. Zero Gradients
+ optimizer.zero_grad()
+
+ # 2. Forward Pass
+ outputs = model(images)
+
+ # 3. Calculate Loss
+ loss = criterion(outputs, labels)
+
+ # 4. Backward Pass (The CUDA magic happens here)
+ # PyTorch automatically handles the graph computation on the GPU.
+ loss.backward()
+
+ # 5. Optimize (Updates the weights)
+ optimizer.step()
+
+ total_loss += loss.item()
+
+ # Calculate average loss for the epoch
+ avg_loss = total_loss / len(train_loader)
+ loss_history.append(avg_loss)
+ print(f"Epoch {epoch+1}/{NUM_EPOCHS} | Loss: {avg_loss:.4f}")
+
+print("š Training Complete!")
+
+
+# ----------------------------------------------
+# š” Cell 5: Testing and Visualization
+# ----------------------------------------------
+
+# Set model to evaluation mode (disables dropout, etc.)
+model.eval()
+correct = 0
+total = 0
+
+with torch.no_grad(): # Context manager that disables gradient tracking (saves memory)
+ for data in test_loader:
+ # CRITICAL: Move data to the GPU
+ images = data[0].to(device)
+ labels = data[1].to(device)
+
+ outputs = model(images)
+
+ # Get the index of the highest score (the predicted class)
+ _, predicted = torch.max(outputs.data, 1)
+
+ total += labels.size(0)
+ correct += (predicted.eq(labels.view_as(predicted))).sum().item()
+
+accuracy = 100 * correct / total
+print(f"\nš Final Test Accuracy: {accuracy:.2f}%")
+
+
+# Visualization (Highly recommended in a Jupyter environment)
+plt.figure(figsize=(12, 5))
+
+# Plot 1: Loss Curve
+plt.subplot(1, 2, 1)
+plt.plot(loss_history, marker='o')
+plt.title("Training Loss Over Epochs")
+plt.xlabel("Epoch")
+plt.ylabel("Loss")
+
+# Plot 2: Conceptual Improvement (You would track accuracy here)
+plt.subplot(1, 2, 2)
+plt.plot([0] * len(loss_history), label="Dummy Acc.") # Placeholder for accuracy plot
+plt.title("Model Performance")
+plt.xlabel("Epoch")
+plt.ylabel("Accuracy (%)")
+
+plt.tight_layout()
+plt.show()
+
+# Optional: Save the best model weights
+torch.save(model.state_dict(), 'mnist_cnn_model.pth')
+print("\nModel weights saved to 'mnist_cnn_model.pth'")
--- /dev/null
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import Dataset, DataLoader
+import numpy as np
+
+
+# ----------------------------------- BUILD MODEL ---------------------------------------------
+# 1. Define the Model Class
+class SimpleDNN(nn.Module):
+ def __init__(self, input_size, hidden_size, output_size):
+ super(SimpleDNN, self).__init__()
+ # Define the layers (Linear means fully connected)
+ self.layer1 = nn.Linear(input_size, hidden_size)
+ self.relu = nn.ReLU() # Activation function
+ self.layer2 = nn.Linear(hidden_size, output_size)
+
+ def forward(self, x):
+ # This is the path data takes through the network
+ x = self.layer1(x)
+ x = self.relu(x)
+ x = self.layer2(x)
+ return x
+
+# Initialization: Assuming input is 784 (like MNIST flattened image)
+INPUT_SIZE = 784
+HIDDEN_SIZE = 128
+OUTPUT_SIZE = 10 # 10 classes
+model = SimpleDNN(INPUT_SIZE, HIDDEN_SIZE, OUTPUT_SIZE)
+
+
+# ----------------------------------- GPU INTEGRATION ---------------------------------------------
+# 1. Check for GPU availability
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"Using device: {device}")
+
+# 2. Move the entire model to the GPU
+model.to(device)
+
+# Setup
+LEARNING_RATE = 0.001
+NUM_EPOCHS = 10
+criterion = nn.CrossEntropyLoss() # Loss function
+optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) # Optimizer
+
+
+# ----------------------------------- LOAD DATA ---------------------------------------------
+# 1. DEFINE THE CUSTOM DATASET
+class CustomDataset(Dataset):
+ def __init__(self, features, labels):
+ # features should be the full dataset of inputs (e.g., all 784 pixel values)
+ self.features = torch.tensor(features, dtype=torch.float32)
+ # labels should be the full dataset of target labels (integers)
+ self.labels = torch.tensor(labels, dtype=torch.long)
+
+ def __len__(self):
+ # Returns the total number of samples
+ return len(self.features)
+
+ def __getitem__(self, idx):
+ # Returns a single sample and its label (formatted as a dictionary
+ # to match your current usage: data['features'], data['labels'])
+ return {
+ 'features': self.features[idx],
+ 'labels': self.labels[idx]
+ }
+
+# 2. LOAD THE DATA (REPLACE THIS WITH YOUR ACTUAL LOADING CODE)
+DUMMY_FEATURES = np.random.rand(100, INPUT_SIZE).astype(np.float32)
+DUMMY_LABELS = np.random.randint(0, OUTPUT_SIZE, 100).astype(np.int64)
+
+# 3. INSTANTIATE AND WRAP THE LOADER
+# Create the dataset object
+train_dataset = CustomDataset(DUMMY_FEATURES, DUMMY_LABELS)
+
+# Create the DataLoader object
+BATCH_SIZE = 64 # Choose a desired batch size
+train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
+
+
+# ----------------------------------- TRAINING ---------------------------------------------
+# --- The Training Loop ---
+for epoch in range(NUM_EPOCHS):
+ for batch_idx, data in enumerate(train_loader):
+
+ # 1. MOVE DATA TO GPU
+ inputs = data['features'].to(device)
+ labels = data['labels'].to(device)
+
+ # 2. ZERO GRADIENTS (Crucial step!)
+ # Must clear the gradients from the previous step
+ optimizer.zero_grad()
+
+ # 3. FORWARD PASS
+ outputs = model(inputs)
+
+ # 4. CALCULATE LOSS
+ loss = criterion(outputs, labels)
+
+ # 5. BACKWARD PASS (Calculates gradients)
+ # This is the step that utilizes CUDA for massive parallel computation.
+ loss.backward()
+
+ # 6. OPTIMIZER STEP (Updates weights)
+ optimizer.step()
+
+ print(f"Epoch {epoch+1}/{NUM_EPOCHS}, Loss: {loss.item():.4f}")