It seems that MAFA dataset has many noisy or missing label data. #1

developer0hye · 2021-03-22T05:20:01Z

Noisy data example

Missing label data example

refer to left girl

Hi, Ehsan!

I have some problem with MAFA dataset.

The below code is the code used to visualize data in MAFA.

I referenced this project code.

It's the problem of the dataset or code?

import scipy.io

import os
import argparse

import cv2

class MAFAReader():
    def __init__(self, base_dir, annotation_file):
        self.base_dir = base_dir
        self.annotation_file = annotation_file
        self.data = scipy.io.loadmat(self.annotation_file)

        if "Train" in annotation_file:
            self.train = True
            self.len_dataset = len(self.data["label_train"][0])
        elif "Test" in annotation_file:
            self.train = False
            self.len_dataset = len(self.data["LabelTest"][0])
        else:
            print("Error: What are you doing now?")
            exit()
            
        print("annotation_file: ", self.annotation_file)
        print("self.len_dataset: ", self.len_dataset)
        
    def read_mat(self, idx):
        if self.train:
            train_image = self.data["label_train"][0][idx]
            train_image_name = str(train_image[1]).strip("['']")  # Test [0]
            train_image_full_path = os.path.join(self.base_dir, "train-images", "images", train_image_name)
            
            print(train_image_full_path)
            
            img = cv2.imread(train_image_full_path)
            
            categories = []
            bboxes = []

            for i in range(0, len(train_image[2])):
                _bbox_label = train_image[2][i]  # Test[1][0]
                _category_id = _bbox_label[12]  # Occ_Type: For Train: 13th, 10th in Test
                _occulution_degree = _bbox_label[13]
                bbox = [_bbox_label[0], _bbox_label[1], _bbox_label[0]+_bbox_label[2], _bbox_label[1]+_bbox_label[3]]
                
                print("_bbox_label: ", _bbox_label)
                print("_category_id: ", _category_id)
                print("_occulution_degree: ", _occulution_degree)
                print("bbox: ", bbox)
                
                if (_category_id != 3 and _occulution_degree > 2):
                    category_name = 'Mask'  # Faces with Mask
                    categories.append(category_name)
                    bboxes.append(bbox)
                elif (_category_id==3 and _occulution_degree<2):
                    category_name = 'No-Mask'  # Faces with Mask
                    categories.append(category_name)
                    bboxes.append(bbox)
                                
                cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), thickness=2)

            cv2.imshow("img", img)
            cv2.waitKey(0)
        else:
            test_image = self.data["LabelTest"][0][idx]
            test_image_name = str(test_image[0]).strip("['']")  # Test [0]
            test_image_full_path = os.path.join(self.base_dir, "test-images", "images", test_image_name)
            img = cv2.imread(test_image_full_path)
            categories = []
            bboxes = []
            for i in range(0, len(test_image[1])):
                _bbox_label = test_image[1][i]  # Test[1][0]
                # Occ_Type: For Train: 13th, 10th in Test
                # In test Data: refer to Face_type, 5th
                _face_type = _bbox_label[4] # Face Type
                _occ_type = _bbox_label[9]
                _occ_degree = _bbox_label[10]
                bbox = [_bbox_label[0], _bbox_label[1], _bbox_label[0] + _bbox_label[2], _bbox_label[1] + _bbox_label[3]]
                if (_face_type==1 and _occ_type!=3 and _occ_degree > 2):
                    category_name = 'Mask'
                    bboxes.append(bbox)
                    categories.append(category_name)
                elif (_face_type==2):
                    category_name = 'No-Mask'
                    bboxes.append(bbox)
                    categories.append(category_name)
                cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), thickness=2)

            cv2.imshow("img", img)
            cv2.waitKey(0)
             
if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='MAFA2YOLO')
    parser.add_argument('--base-dir', default="./", type=str)
    opt = parser.parse_args()
    
    training_set = MAFAReader(base_dir=opt.base_dir, 
                              annotation_file=os.path.join(opt.base_dir, 'MAFA-Label-Train/LabelTrainAll.mat'))
    
    for i in range(training_set.len_dataset):
        training_set.read_mat(idx=i)
        break
    
    test_set = MAFAReader(base_dir=opt.base_dir,
                          annotation_file=os.path.join(opt.base_dir, 'MAFA-Label-Test/LabelTestAll.mat'))
    
    for i in range(test_set.len_dataset):
        test_set.read_mat(idx=i)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

It seems that MAFA dataset has many noisy or missing label data. #1

It seems that MAFA dataset has many noisy or missing label data. #1

developer0hye commented Mar 22, 2021 •

edited

Loading

It seems that MAFA dataset has many noisy or missing label data. #1

It seems that MAFA dataset has many noisy or missing label data. #1

Comments

developer0hye commented Mar 22, 2021 • edited Loading

Noisy data example

Missing label data example

developer0hye commented Mar 22, 2021 •

edited

Loading