📜 Data Talk: LB 0.98 Approch + Code

Lacuna Solar Survey Challenge

Helping Madagascar

$5 000 USD

Completed (~1 year ago)

Skills you will learn

Computer Vision

Prediction

734 joined

247 active

Info Data Chat Leaderboard

Start

Feb 14, 25

Mar 23, 25

Reveal

Mar 24, 25

zulo40

LB 0.98 Approch + Code

Notebooks · 16 Mar 2025, 20:15 · 13

👉 This code will take approximately12+ hours on a GPU.

Approach

I built a pipeline using EfficientNetV2 as the backbone, incorporating metadata-based feature engineering and an attention mechanism. This combination enhances the model’s ability to distinguish between different installation environments and accurately quantify solar technology adoption.

Metadata Processing:One-hot encoding for categorical variables (img_origin, placement). Normalization and scaling for numerical features.
Augmentation Techniques:Geometric transformations: random flips, rotations, and cropping. Photometric transformations: color jitter, CLAHE, and Gaussian blur.

tf_efficientnetv2_b3 from the timm library for feature extraction.
Metadata Fusion:Metadata processed via fully connected layers. Multihead attention mechanism to enhance embeddings.
Regression Head: Predicts the count of solar panels and boilers per image.

Training Strategy

Loss Function: Huber Loss to mitigate outlier effects.
Optimizer: AdamW with weight decay.
Scheduler: Cosine Annealing with warm restarts.
Training Methodology:5-fold cross-validation. 52 epochs per fold. Mixed precision training for efficiency.

Inference and Evaluation

Test Time Augmentation (TTA): Multiple predictions per image to improve robustness.
Mean Absolute Error (MAE): Used as the primary evaluation metric.

# -*- coding: utf-8 -*-

import os

import timm

import torch

import torch.nn as nn

import torch.optim as optim

import albumentations as A

from albumentations.pytorch import ToTensorV2

from torch.utils.data import Dataset, DataLoader

import pandas as pd

import cv2

from sklearn.model_selection import KFold

import numpy as np

from tqdm import tqdm

from torch.amp import autocast, GradScaler  # Fixed import

from sklearn.metrics import mean_absolute_error

# Fixed Albumentations version warning

os.environ['NO_ALBUMENTATIONS_UPDATE'] = '1'

# Enhanced Dataset with Metadata

class SolarPanelDataset(Dataset):

    def __init__(self, dataframe, transform=None, to_train=True):

        self.dataframe = dataframe

        self.transform = transform

        self.to_train = to_train

        self.placement_map = {"roof": 0, "openspace": 1, "r_openspace": 2, "S-unknown": 3}

    def __len__(self):

        return len(self.dataframe)

    def __getitem__(self, idx):

        row = self.dataframe.iloc[idx]

        image = cv2.imread(row["path"])

        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Correct color conversion

        # Improved metadata encoding

        metadata = torch.zeros(5)

        metadata[0] = 1.0 if row["img_origin"] == "D" else 0.0

        placement = self.placement_map.get(row["placement"], 3)

        metadata[1 + placement] = 1.0  # One-hot encoding

        if self.transform:

            image = self.transform(image=image)['image']

        if self.to_train:

            target = torch.tensor([row["boil_nbr"], row["pan_nbr"]], dtype=torch.float32)

            return image, metadata, target

        return image, metadata

# Model with Metadata

class EfficientNetV2Meta(nn.Module):

    def __init__(self):

        super().__init__()

        self.backbone = timm.create_model("tf_efficientnetv2_b3", pretrained=True, num_classes=0)  # you can even try Larger backbone

        self.meta_processor = nn.Sequential(

            nn.Linear(5, 128),

            nn.LayerNorm(128),

            nn.ReLU(),

            nn.Dropout(0.3),

            nn.Linear(128, 64)

        self.attention = nn.MultiheadAttention(embed_dim=64, num_heads=4)

        self.regressor = nn.Sequential(

            nn.Linear(self.backbone.num_features + 64, 512),

            nn.ReLU(),

            nn.Dropout(0.2),

            nn.Linear(512, 2),

            nn.Softplus()  # Better for count predictions

    def forward(self, image, metadata):

        img_features = self.backbone(image)

        meta_features = self.meta_processor(metadata.unsqueeze(0))

        attn_output, _ = self.attention(meta_features, meta_features, meta_features)

        combined = torch.cat([img_features, attn_output.squeeze(0)], dim=1)

        return self.regressor(combined)

# Advanced Augmentation

train_transform = A.Compose([

    A.RandomResizedCrop(512, 512, scale=(0.7, 1.0)),

    A.HorizontalFlip(p=0.5),

    A.VerticalFlip(p=0.5),

    A.RandomRotate90(p=0.5),

    A.GaussianBlur(blur_limit=(3, 7), p=0.3),

    A.CLAHE(clip_limit=4.0, p=0.5),

    A.HueSaturationValue(p=0.3),

    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),

    ToTensorV2()

])

test_transform = A.Compose([

    A.Resize(512, 512),

    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),

    ToTensorV2()

])

# Training Configuration

def train(fold=0, epochs=20, batch_size=16):

    train_df = pd.read_csv("Train.csv")

    train_df = train_df.groupby("ID").agg({

        "boil_nbr": "sum",

        "pan_nbr": "sum",

        "img_origin": "first",

        "placement": "first"

    }).reset_index()

    train_df["path"] = "images/" + train_df["ID"] + ".jpg"

    kf = KFold(n_splits=5, shuffle=True, random_state=42)

    splits = list(kf.split(train_df))

    train_idx, val_idx = splits[fold]

    train_ds = SolarPanelDataset(train_df.iloc[train_idx], transform=train_transform)

    val_ds = SolarPanelDataset(train_df.iloc[val_idx], transform=test_transform)

    train_loader = DataLoader(train_ds, batch_size=batch_size,

                             shuffle=True, num_workers=4, pin_memory=True)

    val_loader = DataLoader(val_ds, batch_size=batch_size*2,

                           shuffle=False, num_workers=4, pin_memory=True)

    model = EfficientNetV2Meta().cuda()

    criterion = nn.HuberLoss(delta=1.0)  # Improved loss function

    optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)

    scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2)

    scaler = GradScaler()

    best_mae = float('inf')

    for epoch in range(epochs):

        # Training loop

        model.train()

        train_loss = 0.0

        pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs} [Train]")

        for images, meta, targets in pbar:

            images = images.cuda(non_blocking=True)

            meta = meta.cuda(non_blocking=True)

            targets = targets.cuda(non_blocking=True)

            optimizer.zero_grad()

            with autocast(device_type='cuda'):

                outputs = model(images, meta)

                loss = criterion(outputs, targets)

            scaler.scale(loss).backward()

            scaler.step(optimizer)

            scaler.update()

            train_loss += loss.item()

            pbar.set_postfix(loss=loss.item())

        # Validation loop

        model.eval()

        val_loss = 0.0

        preds, truths = [], []

        with torch.no_grad():

            for images, meta, targets in tqdm(val_loader, desc=f"Epoch {epoch+1}/{epochs} [Val]"):

                images = images.cuda(non_blocking=True)

                meta = meta.cuda(non_blocking=True)

                targets = targets.cuda(non_blocking=True)

                with autocast(device_type='cuda'):

                    outputs = model(images, meta)

                    loss = criterion(outputs, targets)

                val_loss += loss.item()

                preds.append(outputs.cpu().numpy())

                truths.append(targets.cpu().numpy())

        # Metrics calculation

        train_loss /= len(train_loader)

        val_loss /= len(val_loader)

        preds = np.concatenate(preds)

        truths = np.concatenate(truths)

        mae = mean_absolute_error(truths, preds)

        print(f"Epoch {epoch+1}/{epochs}")

        print(f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val MAE: {mae:.4f}")

        # Model checkpointing based on MAE

        if mae < best_mae:

            best_mae = mae

            torch.save(model.state_dict(), f"best_model_fold{fold}.pth")

        scheduler.step()

    return best_mae

# Inference with TTA

def predict(test_df, model_paths, batch_size=32):

    test_df["path"] = "images/" + test_df["ID"] + ".jpg"

    test_ds = SolarPanelDataset(test_df, transform=test_transform, to_train=False)

    test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=4)

    predictions = np.zeros((len(test_df), 2))

    for path in model_paths:

        model = EfficientNetV2Meta().cuda()

        model.load_state_dict(torch.load(path, weights_only=True))  # Safer loading

        model.eval()

        tta_preds = []

        with torch.no_grad():

            for images, meta in tqdm(test_loader, desc="Inference"):

                images = images.cuda()

                meta = meta.cuda()

                with autocast(device_type='cuda'):

                    outputs = model(images, meta)

                tta_preds.append(outputs.cpu().numpy())

        predictions += np.concatenate(tta_preds)

    return predictions / len(model_paths)

# Main Execution

if __name__ == "__main__":

    # Train multiple folds

    folds = 5

    model_paths = []

    for fold in range(folds):

        print(f"Training fold {fold+1}/{folds}")

        best_mae = train(fold=fold, epochs=52, batch_size=32)

        model_paths.append(f"best_model_fold{fold}.pth")

    # Prepare submission

    test_df = pd.read_csv("Test.csv")

    predictions = predict(test_df, model_paths, batch_size=64)

    # Create submissions

    submission = pd.DataFrame({

        "ID": np.repeat(test_df["ID"].values, 2),

        "Target": predictions.flatten()

})

    submission["ID"] += np.where(

        submission.groupby("ID").cumcount() == 0,

        "_boil",

        "_pan"

    submission.to_csv("submission_original.csv", index=False)

    int_submission = submission.copy()

    int_submission["Target"] = np.round(int_submission["Target"]).astype(int)

    int_submission.to_csv("submission_integer.csv", index=False)

    print("Submissions saved with shapes:", submission.shape, int_submission.shape)

I hope this will help . For further improvments try Vision tansformers and larger backbones and more epcohs .

👉 This code will take approximately12+ hours on a GPU.

Upvote post and kaggle notebook if you find the code helpful.

Kaggle Link:https://www.kaggle.com/code/johndoe2011/efficientnetv2-lacuna-solar-panel

Discussion 13 answers

CodeJoe

Wow, I am perplexed. Thank you so much @zulo40. I have upvoted it.

16 Mar 2025, 21:47

Upvotes 2

zulo40

if i maintained my rank i will surely share the code after the competation

18 Mar 2025, 14:43

Upvotes 1

snow

wow! This is amazing!!

18 Mar 2025, 15:04

Upvotes 2

snow

I like your code so muchat that I made a video for it : )

https://www.youtube.com/watch?v=CyYt5ufgkBA

Thank you again for sharing it!

19 Mar 2025, 02:04

Upvotes 3

zulo40

Thanks buddy , CAn youtell me how did you add that voice .its good and totally aligned

replied to snow19 Mar 2025, 09:40

Upvotes 0

snow

LOL it's a secret :P

replied to zulo4020 Mar 2025, 13:22

Upvotes 1

zulo40

Thanks @everyone for making this most upvoted post .This motivates me to sahre other approaches in future too.

20 Mar 2025, 06:21

Upvotes 2

snow

I wish i can upvote 10 times :)

replied to zulo4020 Mar 2025, 13:23

Upvotes 1

snow

you can use the code below to compute cv score. Very nice cv score!

MAE: 1.21
MAE (pan): 2.24
MAE (boil): 0.18

def predict_train(batch_size=32):

"""

    Makes predictions on the training data where each model predicts on its own validation fold.

    Args:

        batch_size: Batch size for inference

    Returns:

        DataFrame with original data and predictions

"""

    train_df = pd.read_csv("/raid/ml/solar/Train.csv")

    train_df = train_df.groupby("ID").agg({

        "boil_nbr": "sum",

        "pan_nbr": "sum",

        "img_origin": "first",

        "placement": "first"

    }).reset_index()

    train_df["path"] = "/raid/ml/solar/images/" + train_df["ID"] + ".jpg"

    # Create a copy for storing predictions

    result_df = train_df.copy()

    result_df["pred_boil_nbr"] = 0

    result_df["pred_pan_nbr"] = 0

    # For each fold

    kf = KFold(n_splits=5, shuffle=True, random_state=42)

    splits = list(kf.split(train_df))

    for fold in range(5):

        _, val_idx = splits[fold]

        fold_df = train_df.iloc[val_idx].copy()

        # Create dataset and loader for this fold

        val_ds = SolarPanelDataset(fold_df, transform=test_transform, to_train=False)

        val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=4)

        # Load the corresponding model

        model = EfficientNetV2Meta().cuda()

        model.load_state_dict(torch.load(f"best_model_fold{fold}.pth"))

        model.eval()

        # Make predictions

        fold_preds = []

        with torch.no_grad():

            for images, meta in tqdm(val_loader, desc=f"Predicting Fold {fold}"):

                images = images.cuda()

                meta = meta.cuda()

                with autocast(device_type='cuda'):

                    outputs = model(images, meta)

                fold_preds.append(outputs.cpu().numpy())

        # Store predictions for this fold

        fold_preds = np.concatenate(fold_preds)

        result_df.loc[val_idx, "pred_boil_nbr"] = fold_preds[:, 0]

        result_df.loc[val_idx, "pred_pan_nbr"] = fold_preds[:, 1]

    # Calculate MAE for validation

    mae_boil = mean_absolute_error(result_df["boil_nbr"], result_df["pred_boil_nbr"])

    mae_pan = mean_absolute_error(result_df["pan_nbr"], result_df["pred_pan_nbr"])

    overall_mae = mean_absolute_error(

        np.column_stack([result_df["boil_nbr"], result_df["pan_nbr"]]),

        np.column_stack([result_df["pred_boil_nbr"], result_df["pred_pan_nbr"]])

    print(f"Validation MAE - Boil: {mae_boil:.4f}, Pan: {mae_pan:.4f}, Overall: {overall_mae:.4f}")

    return result_df

cv = predict_train()

mae_pan = mean_absolute_error(cv.pan_nbr, cv.pred_pan_nbr)

mae_boil = mean_absolute_error(cv.boil_nbr, cv.pred_boil_nbr)

mae = (mae_pan + mae_boil) / 2

print(f"MAE: {mae:.2f}")

print(f"MAE (pan): {mae_pan:.2f}")

print(f"MAE (boil): {mae_boil:.2f}")

20 Mar 2025, 13:21

Upvotes 3

Agastya

@snow whats the lb score for this cv

replied to snow21 Mar 2025, 18:16

Upvotes 1

snow

I didn't submit. I think it is around 0.96 lb. please note that this is using all 5 folds. the original code trained 3 out of 5 folds. but the difference is small < 0.02 mae

replied to Agastya21 Mar 2025, 18:53

Upvotes 0

CodeJoe

The original code didn't set random state so it can vary. i had a way off score when I tried to check @zulo40's notebook.

replied to snow21 Mar 2025, 20:05

Upvotes 0

snow

I noticed that gpu utilization is not high due to reading the image for every __get_item__() call. Instead I read all the images only once and cached them and i have 3x speedup in training. I also resized the image to 1280,720 so caching cost 13GB system memory. you can tune this for your memory. just be mindful that later we have a 512,512 random crop so don't resize too much.

class SolarPanelDataset(Dataset):

    def __init__(self, dataframe, transform=None, to_train=True, cache_images=True):

        self.dataframe = dataframe

        self.transform = transform

        self.to_train = to_train

        self.placement_map = {"roof": 0, "openspace": 1, "r_openspace": 2, "S-unknown": 3}

        # Cache images during initialization

        self.images = {}

        if cache_images:

            print("Caching images...")

            for idx, row in tqdm(dataframe.iterrows(), total=len(dataframe)):

                try:

                    image = cv2.imread(row["path"])

                    if image is not None:

                        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

                        self.images[row['ID']] = cv2.resize(image, (1280,720))

                    else:

                        print(f"Warning: Unable to read image at {row['path']}")

                except Exception as e:

                    print(f"Error loading image at index {idx}: {e}")

            print(f"Successfully cached {len(self.images)} out of {len(dataframe)} images")

    def __len__(self):

        return len(self.dataframe)

    def __getitem__(self, idx):

        row = self.dataframe.iloc[idx]

        image = self.images[row['ID']]

        # Improved metadata encoding

        metadata = torch.zeros(5)

        metadata[0] = 1.0 if row["img_origin"] == "D" else 0.0

        placement = self.placement_map.get(row["placement"], 3)

        metadata[1 + placement] = 1.0  # One-hot encoding

        if self.transform:

            image = self.transform(image=image)['image']

        if self.to_train:

            target = torch.tensor([row["boil_nbr"], row["pan_nbr"]], dtype=torch.float32)

            return image, metadata, target

        return image, metadata

22 Mar 2025, 01:33

Upvotes 1

Join the largest network for
data scientists and AI builders

About FAQs

Status