hello @MuhammadQasimShabbirAhmad , on validation i get mAP 0.7 but on submitting to zindi, i get very low score,.which isn't improving. Am I doing something wrong?
I think probably you are model is overfit i guess and it can occurs due to many causes. Could me share your code i will see throughly what is causing it dropping it to low i mean overfitting it .
This is my code where i do not drop negative class but still got 82 score yolov11m fine tuning
!pip install ultralytics
!pip install tqdm
import pandas as pd
import os
from pathlib import Path
import shutil
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm
import cv2
import yaml
import matplotlib.pyplot as plt
from ultralytics import YOLO
import multiprocessing
# Set the data directory
DATA_DIR = Path('/kaggle/input/lacuna-malaria-detection-dataset')
# Load train and test files
train = pd.read_csv(DATA_DIR / 'Train.csv')
test = pd.read_csv(DATA_DIR / 'Test.csv')
ss = pd.read_csv(DATA_DIR / 'SampleSubmission.csv')
# Add an image_path column
train['image_path'] = [Path('/kaggle/input/lacuna-malaria-detection-dataset/images' + x) for x in train.Image_ID]
test['image_path'] = [Path('/kaggle/input/lacuna-malaria-detection-dataset/images' + x) for x in test.Image_ID]
# Map string classes to integers (label encoding targets)
train['class_id'] = train['class'].map({'Trophozoite': 0, 'NEG': 1, 'WBC': 2})
# Split data into training and validation
train_unique_imgs_df = train.drop_duplicates(subset=['Image_ID'], ignore_index=True)
X_train, X_val = train_test_split(train_unique_imgs_df, test_size=0.25, stratify=train_unique_imgs_df['class'], random_state=42)
X_train = train[train.Image_ID.isin(X_train.Image_ID)]
X_val = train[train.Image_ID.isin(X_val.Image_ID)]
# Check the shapes of training and validation data
print(X_train.shape, X_val.shape)
# Define directories for images and labels
TRAIN_IMAGES_DIR = Path('/kaggle/working/train/images')
VAL_IMAGES_DIR = Path('/kaggle/working/val/images')
TEST_IMAGES_DIR = Path('/kaggle/working/test/images')
TRAIN_LABELS_DIR = Path('/kaggle/working/train/labels')
VAL_LABELS_DIR = Path('/kaggle/working/val/labels')
TEST_LABELS_DIR = Path('/kaggle/working/test/labels')
# Create necessary directories
for DIR in [TRAIN_IMAGES_DIR, VAL_IMAGES_DIR, TEST_IMAGES_DIR, TRAIN_LABELS_DIR, VAL_LABELS_DIR, TEST_LABELS_DIR]:
if DIR.exists():
shutil.rmtree(DIR)
DIR.mkdir(parents=True, exist_ok=True)
# Copy train, val, and test images to their respective dirs
for img in tqdm(X_train.image_path.unique()):
shutil.copy(img, TRAIN_IMAGES_DIR / img.parts[-1])
for img in tqdm(X_val.image_path.unique()):
shutil.copy(img, VAL_IMAGES_DIR / img.parts[-1])
for img in tqdm(test.image_path.unique()):
shutil.copy(img, TEST_IMAGES_DIR / img.parts[-1])
# Function to convert the bounding boxes to YOLO format and save them
def save_yolo_annotation(row):
image_path, class_id, output_dir = row['image_path'], row['class_id'], row['output_dir']
img = cv2.imread(str(image_path))
if img is None:
raise ValueError(f"Could not read image from path: {image_path}")
height, width, _ = img.shape
label_file = Path(output_dir) / f"{Path(image_path).stem}.txt"
ymin, xmin, ymax, xmax = row['ymin'], row['xmin'], row['ymax'], row['xmax']
# Normalize the coordinates
x_center = (xmin + xmax) / 2 / width
y_center = (ymin + ymax) / 2 / height
bbox_width = (xmax - xmin) / width
bbox_height = (ymax - ymin) / height
with open(label_file, 'a') as f:
f.write(f"{class_id}{x_center:.6f}{y_center:.6f}{bbox_width:.6f}{bbox_height:.6f}\n")
# Parallelize the annotation saving process
def process_dataset(dataframe, output_dir):
dataframe['output_dir'] = output_dir
with multiprocessing.Pool() as pool:
list(tqdm(pool.imap(save_yolo_annotation, dataframe.to_dict('records')), total=len(dataframe)))
# Save train and validation labels to their respective dirs
process_dataset(X_train, TRAIN_LABELS_DIR)
process_dataset(X_val, VAL_LABELS_DIR)
# Create a data.yaml file required by YOLO
class_names = train['class'].unique().tolist()
num_classes = len(class_names)
data_yaml = {
'train': str(TRAIN_IMAGES_DIR),
'val': str(VAL_IMAGES_DIR),
'nc': num_classes,
'names': class_names
}
# Save the data.yaml file
yaml_path = Path('data.yaml')
with open(yaml_path, 'w') as file:
yaml.dump(data_yaml, file, default_flow_style=False)
# Load a YOLO pretrained model
model = YOLO('yolov8m.pt').to(device)
# Fine-tune model to our data
model.train(
data='data.yaml', # Path to the dataset configuration
epochs=10, # Number of epochs
imgsz=1024, # Image size
batch=16, # Batch size# device='0',
device=device
patience=3
)
# Validate the model on the validation set
model.val()
# Make predictions on test images and save them to a CSV file
image_files = os.listdir(TEST_IMAGES_DIR)
all_data = []
for image_file in tqdm(image_files):
img_path = os.path.join(TEST_IMAGES_DIR, image_file)
results = model(img_path)
boxes = results[0].boxes.xyxy.tolist()
classes = results[0].boxes.cls.tolist()
confidences = results[0].boxes.conf.tolist()
names = results[0].names
incorrect_prediction = 0
if not boxes:
incorrect_prediction = incorrect_prediction + 1
all_data.append({
'Image_ID': image_file,
'class': 'NEG',
'confidence': 1.0,
'ymin': 0,
'xmin': 0,
'ymax': 0,
'xmax': 0
})
else:
for box, cls, conf in zip(boxes, classes, confidences):
x1, y1, x2, y2 = box
detected_class = names[int(cls)]
all_data.append({
'Image_ID': image_file,
'class': detected_class,
'confidence': conf,
'ymin': y1,
'xmin': x1,
'ymax': y2,
'xmax': x2
})
print(f'========== Total Incorrect predction or dummy prediction {incorrect_prediction}================')
# Convert the results to a DataFrame and save it
sub = pd.DataFrame(all_data)
sub.to_csv('/kaggle/working/benchmark_submission.csv', index=False)
Acutully When i published this notebook i was using old version of yolo which is yolov8 But latest version is Yolov11 which which i have set Here you can use this notebook only you have to make a small change here model = YOLO('yolov8m.pt')--->>> model = YOLO('yolov11m.pt') Yolov11 has four varaination (n(nano) , s(small) ,m(medium),l(large),x(extralarge))
Send me error you are getting ...
Okay
How can i send
I will guide you you
Send me a copy from oupout of notebook error send it me
hello @MuhammadQasimShabbirAhmad , on validation i get mAP 0.7 but on submitting to zindi, i get very low score,.which isn't improving. Am I doing something wrong?
I think probably you are model is overfit i guess and it can occurs due to many causes. Could me share your code i will see throughly what is causing it dropping it to low i mean overfitting it .
why do not try using yolov11m Version I have got 82 score for this when i fine tuning it . Here is link to my Notebook on Kaggle https://www.kaggle.com/code/muhammadqasimshabbir/zindi1-2ghana-crop-disease/
https://www.kaggle.com/code/muhammadqasimshabbir/zindilacuna-malaria-detection-challenge/ this is notebook for this competeion
and should we drop class NEG for training?
This is my code where i do not drop negative class but still got 82 score yolov11m fine tuning
!pip install ultralytics !pip install tqdm import pandas as pd import os from pathlib import Path import shutil from sklearn.model_selection import train_test_split from tqdm.notebook import tqdm import cv2 import yaml import matplotlib.pyplot as plt from ultralytics import YOLO import multiprocessing # Set the data directory DATA_DIR = Path('/kaggle/input/lacuna-malaria-detection-dataset') # Load train and test files train = pd.read_csv(DATA_DIR / 'Train.csv') test = pd.read_csv(DATA_DIR / 'Test.csv') ss = pd.read_csv(DATA_DIR / 'SampleSubmission.csv') # Add an image_path column train['image_path'] = [Path('/kaggle/input/lacuna-malaria-detection-dataset/images' + x) for x in train.Image_ID] test['image_path'] = [Path('/kaggle/input/lacuna-malaria-detection-dataset/images' + x) for x in test.Image_ID] # Map string classes to integers (label encoding targets) train['class_id'] = train['class'].map({'Trophozoite': 0, 'NEG': 1, 'WBC': 2}) # Split data into training and validation train_unique_imgs_df = train.drop_duplicates(subset=['Image_ID'], ignore_index=True) X_train, X_val = train_test_split(train_unique_imgs_df, test_size=0.25, stratify=train_unique_imgs_df['class'], random_state=42) X_train = train[train.Image_ID.isin(X_train.Image_ID)] X_val = train[train.Image_ID.isin(X_val.Image_ID)] # Check the shapes of training and validation data print(X_train.shape, X_val.shape) # Define directories for images and labels TRAIN_IMAGES_DIR = Path('/kaggle/working/train/images') VAL_IMAGES_DIR = Path('/kaggle/working/val/images') TEST_IMAGES_DIR = Path('/kaggle/working/test/images') TRAIN_LABELS_DIR = Path('/kaggle/working/train/labels') VAL_LABELS_DIR = Path('/kaggle/working/val/labels') TEST_LABELS_DIR = Path('/kaggle/working/test/labels') # Create necessary directories for DIR in [TRAIN_IMAGES_DIR, VAL_IMAGES_DIR, TEST_IMAGES_DIR, TRAIN_LABELS_DIR, VAL_LABELS_DIR, TEST_LABELS_DIR]: if DIR.exists(): shutil.rmtree(DIR) DIR.mkdir(parents=True, exist_ok=True) # Copy train, val, and test images to their respective dirs for img in tqdm(X_train.image_path.unique()): shutil.copy(img, TRAIN_IMAGES_DIR / img.parts[-1]) for img in tqdm(X_val.image_path.unique()): shutil.copy(img, VAL_IMAGES_DIR / img.parts[-1]) for img in tqdm(test.image_path.unique()): shutil.copy(img, TEST_IMAGES_DIR / img.parts[-1]) # Function to convert the bounding boxes to YOLO format and save them def save_yolo_annotation(row): image_path, class_id, output_dir = row['image_path'], row['class_id'], row['output_dir'] img = cv2.imread(str(image_path)) if img is None: raise ValueError(f"Could not read image from path: {image_path}") height, width, _ = img.shape label_file = Path(output_dir) / f"{Path(image_path).stem}.txt" ymin, xmin, ymax, xmax = row['ymin'], row['xmin'], row['ymax'], row['xmax'] # Normalize the coordinates x_center = (xmin + xmax) / 2 / width y_center = (ymin + ymax) / 2 / height bbox_width = (xmax - xmin) / width bbox_height = (ymax - ymin) / height with open(label_file, 'a') as f: f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {bbox_width:.6f} {bbox_height:.6f}\n") # Parallelize the annotation saving process def process_dataset(dataframe, output_dir): dataframe['output_dir'] = output_dir with multiprocessing.Pool() as pool: list(tqdm(pool.imap(save_yolo_annotation, dataframe.to_dict('records')), total=len(dataframe))) # Save train and validation labels to their respective dirs process_dataset(X_train, TRAIN_LABELS_DIR) process_dataset(X_val, VAL_LABELS_DIR) # Create a data.yaml file required by YOLO class_names = train['class'].unique().tolist() num_classes = len(class_names) data_yaml = { 'train': str(TRAIN_IMAGES_DIR), 'val': str(VAL_IMAGES_DIR), 'nc': num_classes, 'names': class_names } # Save the data.yaml file yaml_path = Path('data.yaml') with open(yaml_path, 'w') as file: yaml.dump(data_yaml, file, default_flow_style=False) # Load a YOLO pretrained model model = YOLO('yolov8m.pt').to(device)
# Fine-tune model to our data model.train( data='data.yaml', # Path to the dataset configuration epochs=10, # Number of epochs imgsz=1024, # Image size batch=16, # Batch size # device='0', device=device patience=3 ) # Validate the model on the validation set model.val() # Make predictions on test images and save them to a CSV file image_files = os.listdir(TEST_IMAGES_DIR) all_data = [] for image_file in tqdm(image_files): img_path = os.path.join(TEST_IMAGES_DIR, image_file) results = model(img_path) boxes = results[0].boxes.xyxy.tolist() classes = results[0].boxes.cls.tolist() confidences = results[0].boxes.conf.tolist() names = results[0].names incorrect_prediction = 0 if not boxes: incorrect_prediction = incorrect_prediction + 1 all_data.append({ 'Image_ID': image_file, 'class': 'NEG', 'confidence': 1.0, 'ymin': 0, 'xmin': 0, 'ymax': 0, 'xmax': 0 }) else: for box, cls, conf in zip(boxes, classes, confidences): x1, y1, x2, y2 = box detected_class = names[int(cls)] all_data.append({ 'Image_ID': image_file, 'class': detected_class, 'confidence': conf, 'ymin': y1, 'xmin': x1, 'ymax': y2, 'xmax': x2 }) print(f'========== Total Incorrect predction or dummy prediction {incorrect_prediction}================') # Convert the results to a DataFrame and save it sub = pd.DataFrame(all_data) sub.to_csv('/kaggle/working/benchmark_submission.csv', index=False)
map({'Trophozoite': 0, 'NEG': 1, 'WBC': 2}) #
Here
But here it seems like you used yolov8m not
Acutully When i published this notebook i was using old version of yolo which is yolov8 But latest version is Yolov11 which which i have set Here you can use this notebook only you have to make a small change here model = YOLO('yolov8m.pt')--->>> model = YOLO('yolov11m.pt') Yolov11 has four varaination (n(nano) , s(small) ,m(medium),l(large),x(extralarge))