Skip to content

Commit

Permalink
[Add] Support VIS evaluation for YouTube-VIS task. (#501)
Browse files Browse the repository at this point in the history
* add eval support for VIS task

* add eval support for VIS task

* add eval support for VIS task

* add eval api support

* add ytvos support

* rm pip install

* add vis api

* add vis api

* add unit test

* unit test

* change to skip *.json

* rollback

* solve conflicting files

* add extra unit test

* add eval support for VIS task

* add eval support for VIS task

* Update build.yml

* Update youtube_vis_dataset.py

* Update youtubevis2coco.py

* fix build.yml

* fix build.yml

* fix build.yml

* updated some variable names

* updated some variable names

* updated some variable names

* updated convert_vis_format

* add some blanks

* update code format

* Delete youtube_vis.py

wrong add

* wrong add

* wrong add

* add some description

* add some description

* update

Co-authored-by: Pengxiang Li <[email protected]>
Co-authored-by: Pengxiang Li <[email protected]>
  • Loading branch information
3 people authored Apr 26, 2022
1 parent 55ca8d9 commit d231cfc
Show file tree
Hide file tree
Showing 10 changed files with 1,444 additions and 4 deletions.
3 changes: 2 additions & 1 deletion mmtrack/core/evaluation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
from .eval_sot_ope import eval_sot_ope
from .eval_sot_vot import (bbox2region, eval_sot_accuracy_robustness,
eval_sot_eao)
from .eval_vis import eval_vis

__all__ = [
'EvalHook', 'DistEvalHook', 'eval_mot', 'eval_sot_ope', 'bbox2region',
'eval_sot_eao', 'eval_sot_accuracy_robustness'
'eval_sot_eao', 'eval_sot_accuracy_robustness', 'eval_vis'
]
72 changes: 72 additions & 0 deletions mmtrack/core/evaluation/eval_vis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# Copyright (c) OpenMMLab. All rights reserved.
import contextlib
import io
from collections import OrderedDict

from mmcv.utils import print_log

from .ytvis import YTVIS
from .ytviseval import YTVISeval


def eval_vis(test_results, vis_anns, logger=None):
"""Evaluation on VIS metrics.
Args:
test_results (dict(list[dict])): Testing results of the VIS dataset.
vis_anns (dict(list[dict])): The annotation in the format
of YouTube-VIS.
logger (logging.Logger | str | None): Logger used for printing
related information during evaluation. Default: None.
Returns:
dict[str, float]: Evaluation results.
"""
ytvis = YTVIS(vis_anns)

if len(ytvis.anns) == 0:
print_log('Annotations does not exist', logger=logger)
return

ytvis_dets = ytvis.loadRes(test_results)
vid_ids = ytvis.getVidIds()

iou_type = metric = 'segm'
eval_results = OrderedDict()
ytvisEval = YTVISeval(ytvis, ytvis_dets, iou_type)
ytvisEval.params.vidIds = vid_ids
ytvisEval.evaluate()
ytvisEval.accumulate()

# Save coco summarize print information to logger
redirect_string = io.StringIO()
with contextlib.redirect_stdout(redirect_string):
ytvisEval.summarize()
print_log('\n' + redirect_string.getvalue(), logger=logger)

metric_items = ['mAP', 'mAP_50', 'mAP_75', 'mAP_s', 'mAP_m', 'mAP_l']
coco_metric_names = {
'mAP': 0,
'mAP_50': 1,
'mAP_75': 2,
'mAP_s': 3,
'mAP_m': 4,
'mAP_l': 5,
'AR@1': 6,
'AR@10': 7,
'AR@100': 8,
'AR_s@100': 9,
'AR_m@100': 10,
'AR_l@100': 11
}

for metric_item in metric_items:
key = f'{metric}_{metric_item}'
val = float(f'{ytvisEval.stats[coco_metric_names[metric_item]]:.3f}')
eval_results[key] = val

ap = ytvisEval.stats[:6]
eval_results[f'{metric}_mAP_copypaste'] = (
f'{ap[0]:.3f} {ap[1]:.3f} {ap[2]:.3f} {ap[3]:.3f} '
f'{ap[4]:.3f} {ap[5]:.3f}')
return eval_results
305 changes: 305 additions & 0 deletions mmtrack/core/evaluation/ytvis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,305 @@
# Copyright (c) Github URL
# Copied from
# https://github.com/youtubevos/cocoapi/blob/master/PythonAPI/pycocotools/ytvos.py
__author__ = 'ychfan'
# Interface for accessing the YouTubeVIS dataset.

# The following API functions are defined:
# YTVIS - YTVIS api class that loads YouTubeVIS annotation file
# and prepare data structures.
# decodeMask - Decode binary mask M encoded via run-length encoding.
# encodeMask - Encode binary mask M using run-length encoding.
# getAnnIds - Get ann ids that satisfy given filter conditions.
# getCatIds - Get cat ids that satisfy given filter conditions.
# getImgIds - Get img ids that satisfy given filter conditions.
# loadAnns - Load anns with the specified ids.
# loadCats - Load cats with the specified ids.
# loadImgs - Load imgs with the specified ids.
# annToMask - Convert segmentation in an annotation to binary mask.
# loadRes - Load algorithm results and create API for accessing them.

# Microsoft COCO Toolbox. version 2.0
# Data, paper, and tutorials available at: http://mscoco.org/
# Code written by Piotr Dollar and Tsung-Yi Lin, 2014.
# Licensed under the Simplified BSD License [see bsd.txt]

import copy
import itertools
import json
import sys
import time
from collections import defaultdict

import numpy as np
from pycocotools import mask as maskUtils

PYTHON_VERSION = sys.version_info[0]


def _isArrayLike(obj):
return hasattr(obj, '__iter__') and hasattr(obj, '__len__')


class YTVIS:

def __init__(self, annotation_file=None):
"""Constructor of Microsoft COCO helper class for reading and
visualizing annotations.
:param annotation_file (str | dict): location of annotation file or
dict results.
:param image_folder (str): location to the folder that hosts images.
:return:
"""
# load dataset
self.dataset, self.anns, self.cats, self.vids = dict(), dict(), dict(
), dict()
self.vidToAnns, self.catToVids = defaultdict(list), defaultdict(list)
if annotation_file is not None:
print('loading annotations into memory...')
tic = time.time()
if type(annotation_file) == str:
dataset = json.load(open(annotation_file, 'r'))
else:
dataset = annotation_file
assert type(
dataset
) == dict, 'annotation file format {} not supported'.format(
type(dataset))
print('Done (t={:0.2f}s)'.format(time.time() - tic))
self.dataset = dataset
self.createIndex()

def createIndex(self):
# create index
print('creating index...')
anns, cats, vids = {}, {}, {}
vidToAnns, catToVids = defaultdict(list), defaultdict(list)
if 'annotations' in self.dataset:
for ann in self.dataset['annotations']:
vidToAnns[ann['video_id']].append(ann)
anns[ann['id']] = ann

if 'videos' in self.dataset:
for vid in self.dataset['videos']:
vids[vid['id']] = vid

if 'categories' in self.dataset:
for cat in self.dataset['categories']:
cats[cat['id']] = cat

if 'annotations' in self.dataset and 'categories' in self.dataset:
for ann in self.dataset['annotations']:
catToVids[ann['category_id']].append(ann['video_id'])

print('index created!')

# create class members
self.anns = anns
self.vidToAnns = vidToAnns
self.catToVids = catToVids
self.vids = vids
self.cats = cats

def getAnnIds(self, vidIds=[], catIds=[], areaRng=[], iscrowd=None):
"""Get ann ids that satisfy given filter conditions. default skips that
filter.
:param vidIds (int array) : get anns for given vids
catIds (int array) : get anns for given cats
areaRng (float array) : get anns for given area range
iscrowd (boolean) : get anns for given crowd label
:return: ids (int array) : integer array of ann ids
"""
vidIds = vidIds if _isArrayLike(vidIds) else [vidIds]
catIds = catIds if _isArrayLike(catIds) else [catIds]

if len(vidIds) == len(catIds) == len(areaRng) == 0:
anns = self.dataset['annotations']
else:
if not len(vidIds) == 0:
lists = [
self.vidToAnns[vidId] for vidId in vidIds
if vidId in self.vidToAnns
]
anns = list(itertools.chain.from_iterable(lists))
else:
anns = self.dataset['annotations']
anns = anns if len(catIds) == 0 else [
ann for ann in anns if ann['category_id'] in catIds
]
anns = anns if len(areaRng) == 0 else [
ann for ann in anns if ann['avg_area'] > areaRng[0]
and ann['avg_area'] < areaRng[1]
]
if iscrowd is not None:
ids = [ann['id'] for ann in anns if ann['iscrowd'] == iscrowd]
else:
ids = [ann['id'] for ann in anns]
return ids

def getCatIds(self, catNms=[], supNms=[], catIds=[]):
"""filtering parameters. default skips that filter.
:param catNms (str array) : get cats for given cat names
:param supNms (str array) : get cats for given supercategory names
:param catIds (int array) : get cats for given cat ids
:return: ids (int array) : integer array of cat ids
"""
catNms = catNms if _isArrayLike(catNms) else [catNms]
supNms = supNms if _isArrayLike(supNms) else [supNms]
catIds = catIds if _isArrayLike(catIds) else [catIds]

if len(catNms) == len(supNms) == len(catIds) == 0:
cats = self.dataset['categories']
else:
cats = self.dataset['categories']
cats = cats if len(catNms) == 0 else [
cat for cat in cats if cat['name'] in catNms
]
cats = cats if len(supNms) == 0 else [
cat for cat in cats if cat['supercategory'] in supNms
]
cats = cats if len(catIds) == 0 else [
cat for cat in cats if cat['id'] in catIds
]
ids = [cat['id'] for cat in cats]
return ids

def getVidIds(self, vidIds=[], catIds=[]):
"""Get vid ids that satisfy given filter conditions.
:param vidIds (int array) : get vids for given ids
:param catIds (int array) : get vids with all given cats
:return: ids (int array) : integer array of vid ids
"""
vidIds = vidIds if _isArrayLike(vidIds) else [vidIds]
catIds = catIds if _isArrayLike(catIds) else [catIds]

if len(vidIds) == len(catIds) == 0:
ids = self.vids.keys()
else:
ids = set(vidIds)
for i, catId in enumerate(catIds):
if i == 0 and len(ids) == 0:
ids = set(self.catToVids[catId])
else:
ids &= set(self.catToVids[catId])
return list(ids)

def loadAnns(self, ids=[]):
"""Load anns with the specified ids.
:param ids (int array) : integer ids specifying anns
:return: anns (object array) : loaded ann objects
"""
if _isArrayLike(ids):
return [self.anns[id] for id in ids]
elif type(ids) == int:
return [self.anns[ids]]

def loadCats(self, ids=[]):
"""Load cats with the specified ids.
:param ids (int array) : integer ids specifying cats
:return: cats (object array) : loaded cat objects
"""
if _isArrayLike(ids):
return [self.cats[id] for id in ids]
elif type(ids) == int:
return [self.cats[ids]]

def loadVids(self, ids=[]):
"""Load anns with the specified ids.
:param ids (int array) : integer ids specifying vid
:return: vids (object array) : loaded vid objects
"""
if _isArrayLike(ids):
return [self.vids[id] for id in ids]
elif type(ids) == int:
return [self.vids[ids]]

def loadRes(self, resFile):
"""Load result file and return a result api object.
:param resFile (str) : file name of result file
:return: res (obj) : result api object
"""
res = YTVIS()
res.dataset['videos'] = [img for img in self.dataset['videos']]

print('Loading and preparing results...')
tic = time.time()
if type(resFile) == str or (PYTHON_VERSION == 2
and type(resFile) == str):
anns = json.load(open(resFile))
elif type(resFile) == np.ndarray:
anns = self.loadNumpyAnnotations(resFile)
else:
anns = resFile
assert type(anns) == list, 'results in not an array of objects'
annsVidIds = [ann['video_id'] for ann in anns]
assert set(annsVidIds) == (set(annsVidIds) & set(self.getVidIds())), \
'Results do not correspond to current coco set'
if 'segmentations' in anns[0]:
res.dataset['categories'] = copy.deepcopy(
self.dataset['categories'])
for id, ann in enumerate(anns):
ann['areas'] = []
if 'bboxes' not in ann:
ann['bboxes'] = []
for seg in ann['segmentations']:
# now only support compressed RLE format
# as segmentation results
if seg:
ann['areas'].append(maskUtils.area(seg))
if len(ann['bboxes']) < len(ann['areas']):
ann['bboxes'].append(maskUtils.toBbox(seg))
else:
ann['areas'].append(None)
if len(ann['bboxes']) < len(ann['areas']):
ann['bboxes'].append(None)
ann['id'] = id + 1
l_ori = [a for a in ann['areas'] if a]
if len(l_ori) == 0:
ann['avg_area'] = 0
else:
ann['avg_area'] = np.array(l_ori).mean()
ann['iscrowd'] = 0
print('DONE (t={:0.2f}s)'.format(time.time() - tic))

res.dataset['annotations'] = anns
res.createIndex()
return res

def annToRLE(self, ann, frameId):
"""Convert annotation which can be polygons, uncompressed RLE to RLE.
:return: binary mask (numpy 2D array)
"""
t = self.vids[ann['video_id']]
h, w = t['height'], t['width']
segm = ann['segmentations'][frameId]
if type(segm) == list:
# polygon -- a single object might consist of multiple parts
# we merge all parts into one mask rle code
rles = maskUtils.frPyObjects(segm, h, w)
rle = maskUtils.merge(rles)
elif type(segm['counts']) == list:
# uncompressed RLE
rle = maskUtils.frPyObjects(segm, h, w)
else:
# rle
rle = segm
return rle

def annToMask(self, ann, frameId):
"""Convert annotation which can be polygons, uncompressed RLE, or RLE
to binary mask.
:return: binary mask (numpy 2D array)
"""
rle = self.annToRLE(ann, frameId)
m = maskUtils.decode(rle)
return m
Loading

0 comments on commit d231cfc

Please sign in to comment.