Primary competition visual

Lacuna Solar Survey Challenge

Helping Madagascar
$5 000 USD
Completed (12 months ago)
Computer Vision
Prediction
729 joined
247 active
Starti
Feb 14, 25
Closei
Mar 23, 25
Reveali
Mar 24, 25
LB 0.98 Approch + Code
Notebooks · 16 Mar 2025, 20:15 · 13

👉 This code will take approximately12+ hours on a GPU.

Approach

I built a pipeline using EfficientNetV2 as the backbone, incorporating metadata-based feature engineering and an attention mechanism. This combination enhances the model’s ability to distinguish between different installation environments and accurately quantify solar technology adoption.

  • Metadata Processing:One-hot encoding for categorical variables (img_origin, placement). Normalization and scaling for numerical features.
  • Augmentation Techniques:Geometric transformations: random flips, rotations, and cropping. Photometric transformations: color jitter, CLAHE, and Gaussian blur.

  • tf_efficientnetv2_b3 from the timm library for feature extraction.
  • Metadata Fusion:Metadata processed via fully connected layers. Multihead attention mechanism to enhance embeddings.
  • Regression Head: Predicts the count of solar panels and boilers per image.

Training Strategy

  • Loss Function: Huber Loss to mitigate outlier effects.
  • Optimizer: AdamW with weight decay.
  • Scheduler: Cosine Annealing with warm restarts.
  • Training Methodology:5-fold cross-validation. 52 epochs per fold. Mixed precision training for efficiency.

Inference and Evaluation

  • Test Time Augmentation (TTA): Multiple predictions per image to improve robustness.
  • Mean Absolute Error (MAE): Used as the primary evaluation metric.
# -*- coding: utf-8 -*-
import os
import timm
import torch
import torch.nn as nn
import torch.optim as optim
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import cv2
from sklearn.model_selection import KFold
import numpy as np
from tqdm import tqdm
from torch.amp import autocast, GradScaler  # Fixed import
from sklearn.metrics import mean_absolute_error

# Fixed Albumentations version warning
os.environ['NO_ALBUMENTATIONS_UPDATE'] = '1'

# Enhanced Dataset with Metadata
class SolarPanelDataset(Dataset):
    def __init__(self, dataframe, transform=None, to_train=True):
        self.dataframe = dataframe
        self.transform = transform
        self.to_train = to_train
        self.placement_map = {"roof": 0, "openspace": 1, "r_openspace": 2, "S-unknown": 3}

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        image = cv2.imread(row["path"])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Correct color conversion

        # Improved metadata encoding
        metadata = torch.zeros(5)
        metadata[0] = 1.0 if row["img_origin"] == "D" else 0.0
        placement = self.placement_map.get(row["placement"], 3)
        metadata[1 + placement] = 1.0  # One-hot encoding

        if self.transform:
            image = self.transform(image=image)['image']

        if self.to_train:
            target = torch.tensor([row["boil_nbr"], row["pan_nbr"]], dtype=torch.float32)
            return image, metadata, target
        return image, metadata

# Model with Metadata
class EfficientNetV2Meta(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = timm.create_model("tf_efficientnetv2_b3", pretrained=True, num_classes=0)  # you can even try Larger backbone
        self.meta_processor = nn.Sequential(
            nn.Linear(5, 128),
            nn.LayerNorm(128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64)
        )
        self.attention = nn.MultiheadAttention(embed_dim=64, num_heads=4)
        self.regressor = nn.Sequential(
            nn.Linear(self.backbone.num_features + 64, 512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, 2),
            nn.Softplus()  # Better for count predictions
        )

    def forward(self, image, metadata):
        img_features = self.backbone(image)
        meta_features = self.meta_processor(metadata.unsqueeze(0))
        attn_output, _ = self.attention(meta_features, meta_features, meta_features)
        combined = torch.cat([img_features, attn_output.squeeze(0)], dim=1)
        return self.regressor(combined)

# Advanced Augmentation
train_transform = A.Compose([
    A.RandomResizedCrop(512, 512, scale=(0.7, 1.0)),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.GaussianBlur(blur_limit=(3, 7), p=0.3),
    A.CLAHE(clip_limit=4.0, p=0.5),
    A.HueSaturationValue(p=0.3),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

test_transform = A.Compose([
    A.Resize(512, 512),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

# Training Configuration
def train(fold=0, epochs=20, batch_size=16):
    train_df = pd.read_csv("Train.csv")
    train_df = train_df.groupby("ID").agg({
        "boil_nbr": "sum",
        "pan_nbr": "sum",
        "img_origin": "first",
        "placement": "first"
    }).reset_index()
    train_df["path"] = "images/" + train_df["ID"] + ".jpg"

    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    splits = list(kf.split(train_df))
    train_idx, val_idx = splits[fold]

    train_ds = SolarPanelDataset(train_df.iloc[train_idx], transform=train_transform)
    val_ds = SolarPanelDataset(train_df.iloc[val_idx], transform=test_transform)

    train_loader = DataLoader(train_ds, batch_size=batch_size, 
                             shuffle=True, num_workers=4, pin_memory=True)
    val_loader = DataLoader(val_ds, batch_size=batch_size*2, 
                           shuffle=False, num_workers=4, pin_memory=True)

    model = EfficientNetV2Meta().cuda()
    criterion = nn.HuberLoss(delta=1.0)  # Improved loss function
    optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2)
    scaler = GradScaler()

    best_mae = float('inf')
    for epoch in range(epochs):
        # Training loop
        model.train()
        train_loss = 0.0
        pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs} [Train]")
        for images, meta, targets in pbar:
            images = images.cuda(non_blocking=True)
            meta = meta.cuda(non_blocking=True)
            targets = targets.cuda(non_blocking=True)
            
            optimizer.zero_grad()
            with autocast(device_type='cuda'):
                outputs = model(images, meta)
                loss = criterion(outputs, targets)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            
            train_loss += loss.item()
            pbar.set_postfix(loss=loss.item())
        
        # Validation loop
        model.eval()
        val_loss = 0.0
        preds, truths = [], []
        with torch.no_grad():
            for images, meta, targets in tqdm(val_loader, desc=f"Epoch {epoch+1}/{epochs} [Val]"):
                images = images.cuda(non_blocking=True)
                meta = meta.cuda(non_blocking=True)
                targets = targets.cuda(non_blocking=True)
                
                with autocast(device_type='cuda'):
                    outputs = model(images, meta)
                    loss = criterion(outputs, targets)
                
                val_loss += loss.item()
                preds.append(outputs.cpu().numpy())
                truths.append(targets.cpu().numpy())
        
        # Metrics calculation
        train_loss /= len(train_loader)
        val_loss /= len(val_loader)
        preds = np.concatenate(preds)
        truths = np.concatenate(truths)
        mae = mean_absolute_error(truths, preds)
        
        print(f"Epoch {epoch+1}/{epochs}")
        print(f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val MAE: {mae:.4f}")
        
        # Model checkpointing based on MAE
        if mae < best_mae:
            best_mae = mae
            torch.save(model.state_dict(), f"best_model_fold{fold}.pth")
        
        scheduler.step()
    
    return best_mae

# Inference with TTA
def predict(test_df, model_paths, batch_size=32):
    test_df["path"] = "images/" + test_df["ID"] + ".jpg"
    test_ds = SolarPanelDataset(test_df, transform=test_transform, to_train=False)
    test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=4)
    
    predictions = np.zeros((len(test_df), 2))
    for path in model_paths:
        model = EfficientNetV2Meta().cuda()
        model.load_state_dict(torch.load(path, weights_only=True))  # Safer loading
        model.eval()
        
        tta_preds = []
        with torch.no_grad():
            for images, meta in tqdm(test_loader, desc="Inference"):
                images = images.cuda()
                meta = meta.cuda()
                with autocast(device_type='cuda'):
                    outputs = model(images, meta)
                tta_preds.append(outputs.cpu().numpy())
        
        predictions += np.concatenate(tta_preds)
    
    return predictions / len(model_paths)

# Main Execution
if __name__ == "__main__":
    # Train multiple folds
    folds = 5
    model_paths = []
    for fold in range(folds):
        print(f"Training fold {fold+1}/{folds}")
        best_mae = train(fold=fold, epochs=52, batch_size=32)
        model_paths.append(f"best_model_fold{fold}.pth")
    
    # Prepare submission
    test_df = pd.read_csv("Test.csv")
    predictions = predict(test_df, model_paths, batch_size=64)
    
    # Create submissions
    submission = pd.DataFrame({
        "ID": np.repeat(test_df["ID"].values, 2),
        "Target": predictions.flatten()
    })
    submission["ID"] += np.where(
        submission.groupby("ID").cumcount() == 0,
        "_boil",
        "_pan"
    )
    submission.to_csv("submission_original.csv", index=False)
    
    int_submission = submission.copy()
    int_submission["Target"] = np.round(int_submission["Target"]).astype(int)
    int_submission.to_csv("submission_integer.csv", index=False)
    
    print("Submissions saved with shapes:", submission.shape, int_submission.shape)

I hope this will help . For further improvments try Vision tansformers and larger backbones and more epcohs .

👉 This code will take approximately12+ hours on a GPU.

Upvote post and kaggle notebook if you find the code helpful.

Kaggle Link:https://www.kaggle.com/code/johndoe2011/efficientnetv2-lacuna-solar-panel

Discussion 13 answers
User avatar
CodeJoe

Wow, I am perplexed. Thank you so much @zulo40. I have upvoted it.

16 Mar 2025, 21:47
Upvotes 2

if i maintained my rank i will surely share the code after the competation

18 Mar 2025, 14:43
Upvotes 1

wow! This is amazing!!

18 Mar 2025, 15:04
Upvotes 2

I like your code so muchat that I made a video for it : )

https://www.youtube.com/watch?v=CyYt5ufgkBA

Thank you again for sharing it!

19 Mar 2025, 02:04
Upvotes 3

Thanks buddy , CAn youtell me how did you add that voice .its good and totally aligned

LOL it's a secret :P

Thanks @everyone for making this most upvoted post .This motivates me to sahre other approaches in future too.

20 Mar 2025, 06:21
Upvotes 2

I wish i can upvote 10 times :)

you can use the code below to compute cv score. Very nice cv score!

MAE: 1.21
MAE (pan): 2.24
MAE (boil): 0.18

def predict_train(batch_size=32):
    """
    Makes predictions on the training data where each model predicts on its own validation fold.
    
    Args:
        batch_size: Batch size for inference
    
    Returns:
        DataFrame with original data and predictions
    """
    train_df = pd.read_csv("/raid/ml/solar/Train.csv")
    train_df = train_df.groupby("ID").agg({
        "boil_nbr": "sum",
        "pan_nbr": "sum",
        "img_origin": "first",
        "placement": "first"
    }).reset_index()
    train_df["path"] = "/raid/ml/solar/images/" + train_df["ID"] + ".jpg"
    
    # Create a copy for storing predictions
    result_df = train_df.copy()
    result_df["pred_boil_nbr"] = 0
    result_df["pred_pan_nbr"] = 0
    
    # For each fold
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    splits = list(kf.split(train_df))
    
    for fold in range(5):
        _, val_idx = splits[fold]
        fold_df = train_df.iloc[val_idx].copy()
        
        # Create dataset and loader for this fold
        val_ds = SolarPanelDataset(fold_df, transform=test_transform, to_train=False)
        val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=4)
        
        # Load the corresponding model
        model = EfficientNetV2Meta().cuda()
        model.load_state_dict(torch.load(f"best_model_fold{fold}.pth"))
        model.eval()
        
        # Make predictions
        fold_preds = []
        with torch.no_grad():
            for images, meta in tqdm(val_loader, desc=f"Predicting Fold {fold}"):
                images = images.cuda()
                meta = meta.cuda()
                with autocast(device_type='cuda'):
                    outputs = model(images, meta)
                fold_preds.append(outputs.cpu().numpy())
        
        # Store predictions for this fold
        fold_preds = np.concatenate(fold_preds)
        result_df.loc[val_idx, "pred_boil_nbr"] = fold_preds[:, 0]
        result_df.loc[val_idx, "pred_pan_nbr"] = fold_preds[:, 1]
    
    # Calculate MAE for validation
    mae_boil = mean_absolute_error(result_df["boil_nbr"], result_df["pred_boil_nbr"])
    mae_pan = mean_absolute_error(result_df["pan_nbr"], result_df["pred_pan_nbr"])
    overall_mae = mean_absolute_error(
        np.column_stack([result_df["boil_nbr"], result_df["pan_nbr"]]),
        np.column_stack([result_df["pred_boil_nbr"], result_df["pred_pan_nbr"]])
    )
    
    print(f"Validation MAE - Boil: {mae_boil:.4f}, Pan: {mae_pan:.4f}, Overall: {overall_mae:.4f}")
    
    return result_df
cv = predict_train()
mae_pan = mean_absolute_error(cv.pan_nbr, cv.pred_pan_nbr)
mae_boil = mean_absolute_error(cv.boil_nbr, cv.pred_boil_nbr)
mae = (mae_pan + mae_boil) / 2
print(f"MAE: {mae:.2f}")
print(f"MAE (pan): {mae_pan:.2f}")
print(f"MAE (boil): {mae_boil:.2f}")

20 Mar 2025, 13:21
Upvotes 3
User avatar
Agastya

@snow whats the lb score for this cv

I didn't submit. I think it is around 0.96 lb. please note that this is using all 5 folds. the original code trained 3 out of 5 folds. but the difference is small < 0.02 mae

User avatar
CodeJoe

The original code didn't set random state so it can vary. i had a way off score when I tried to check @zulo40's notebook.

I noticed that gpu utilization is not high due to reading the image for every __get_item__() call. Instead I read all the images only once and cached them and i have 3x speedup in training. I also resized the image to 1280,720 so caching cost 13GB system memory. you can tune this for your memory. just be mindful that later we have a 512,512 random crop so don't resize too much.

class SolarPanelDataset(Dataset):
    def __init__(self, dataframe, transform=None, to_train=True, cache_images=True):
        self.dataframe = dataframe
        self.transform = transform
        self.to_train = to_train
        self.placement_map = {"roof": 0, "openspace": 1, "r_openspace": 2, "S-unknown": 3}
        
        # Cache images during initialization
        self.images = {}
        if cache_images:
            print("Caching images...")
            for idx, row in tqdm(dataframe.iterrows(), total=len(dataframe)):
                try:
                    image = cv2.imread(row["path"])
                    if image is not None:
                        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                        self.images[row['ID']] = cv2.resize(image, (1280,720))
                    else:
                        print(f"Warning: Unable to read image at {row['path']}")
                except Exception as e:
                    print(f"Error loading image at index {idx}: {e}")
            
            print(f"Successfully cached {len(self.images)} out of {len(dataframe)} images")

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        
        image = self.images[row['ID']]

        # Improved metadata encoding
        metadata = torch.zeros(5)
        metadata[0] = 1.0 if row["img_origin"] == "D" else 0.0
        placement = self.placement_map.get(row["placement"], 3)
        metadata[1 + placement] = 1.0  # One-hot encoding

        if self.transform:
            image = self.transform(image=image)['image']

        if self.to_train:
            target = torch.tensor([row["boil_nbr"], row["pan_nbr"]], dtype=torch.float32)
            return image, metadata, target
        return image, metadata
22 Mar 2025, 01:33
Upvotes 1