-
Notifications
You must be signed in to change notification settings - Fork 598
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Add] Support VIS evaluation for YouTube-VIS task. (#501)
* add eval support for VIS task * add eval support for VIS task * add eval support for VIS task * add eval api support * add ytvos support * rm pip install * add vis api * add vis api * add unit test * unit test * change to skip *.json * rollback * solve conflicting files * add extra unit test * add eval support for VIS task * add eval support for VIS task * Update build.yml * Update youtube_vis_dataset.py * Update youtubevis2coco.py * fix build.yml * fix build.yml * fix build.yml * updated some variable names * updated some variable names * updated some variable names * updated convert_vis_format * add some blanks * update code format * Delete youtube_vis.py wrong add * wrong add * wrong add * add some description * add some description * update Co-authored-by: Pengxiang Li <[email protected]> Co-authored-by: Pengxiang Li <[email protected]>
- Loading branch information
1 parent
55ca8d9
commit d231cfc
Showing
10 changed files
with
1,444 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
# Copyright (c) OpenMMLab. All rights reserved. | ||
import contextlib | ||
import io | ||
from collections import OrderedDict | ||
|
||
from mmcv.utils import print_log | ||
|
||
from .ytvis import YTVIS | ||
from .ytviseval import YTVISeval | ||
|
||
|
||
def eval_vis(test_results, vis_anns, logger=None): | ||
"""Evaluation on VIS metrics. | ||
Args: | ||
test_results (dict(list[dict])): Testing results of the VIS dataset. | ||
vis_anns (dict(list[dict])): The annotation in the format | ||
of YouTube-VIS. | ||
logger (logging.Logger | str | None): Logger used for printing | ||
related information during evaluation. Default: None. | ||
Returns: | ||
dict[str, float]: Evaluation results. | ||
""" | ||
ytvis = YTVIS(vis_anns) | ||
|
||
if len(ytvis.anns) == 0: | ||
print_log('Annotations does not exist', logger=logger) | ||
return | ||
|
||
ytvis_dets = ytvis.loadRes(test_results) | ||
vid_ids = ytvis.getVidIds() | ||
|
||
iou_type = metric = 'segm' | ||
eval_results = OrderedDict() | ||
ytvisEval = YTVISeval(ytvis, ytvis_dets, iou_type) | ||
ytvisEval.params.vidIds = vid_ids | ||
ytvisEval.evaluate() | ||
ytvisEval.accumulate() | ||
|
||
# Save coco summarize print information to logger | ||
redirect_string = io.StringIO() | ||
with contextlib.redirect_stdout(redirect_string): | ||
ytvisEval.summarize() | ||
print_log('\n' + redirect_string.getvalue(), logger=logger) | ||
|
||
metric_items = ['mAP', 'mAP_50', 'mAP_75', 'mAP_s', 'mAP_m', 'mAP_l'] | ||
coco_metric_names = { | ||
'mAP': 0, | ||
'mAP_50': 1, | ||
'mAP_75': 2, | ||
'mAP_s': 3, | ||
'mAP_m': 4, | ||
'mAP_l': 5, | ||
'AR@1': 6, | ||
'AR@10': 7, | ||
'AR@100': 8, | ||
'AR_s@100': 9, | ||
'AR_m@100': 10, | ||
'AR_l@100': 11 | ||
} | ||
|
||
for metric_item in metric_items: | ||
key = f'{metric}_{metric_item}' | ||
val = float(f'{ytvisEval.stats[coco_metric_names[metric_item]]:.3f}') | ||
eval_results[key] = val | ||
|
||
ap = ytvisEval.stats[:6] | ||
eval_results[f'{metric}_mAP_copypaste'] = ( | ||
f'{ap[0]:.3f} {ap[1]:.3f} {ap[2]:.3f} {ap[3]:.3f} ' | ||
f'{ap[4]:.3f} {ap[5]:.3f}') | ||
return eval_results |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,305 @@ | ||
# Copyright (c) Github URL | ||
# Copied from | ||
# https://github.com/youtubevos/cocoapi/blob/master/PythonAPI/pycocotools/ytvos.py | ||
__author__ = 'ychfan' | ||
# Interface for accessing the YouTubeVIS dataset. | ||
|
||
# The following API functions are defined: | ||
# YTVIS - YTVIS api class that loads YouTubeVIS annotation file | ||
# and prepare data structures. | ||
# decodeMask - Decode binary mask M encoded via run-length encoding. | ||
# encodeMask - Encode binary mask M using run-length encoding. | ||
# getAnnIds - Get ann ids that satisfy given filter conditions. | ||
# getCatIds - Get cat ids that satisfy given filter conditions. | ||
# getImgIds - Get img ids that satisfy given filter conditions. | ||
# loadAnns - Load anns with the specified ids. | ||
# loadCats - Load cats with the specified ids. | ||
# loadImgs - Load imgs with the specified ids. | ||
# annToMask - Convert segmentation in an annotation to binary mask. | ||
# loadRes - Load algorithm results and create API for accessing them. | ||
|
||
# Microsoft COCO Toolbox. version 2.0 | ||
# Data, paper, and tutorials available at: http://mscoco.org/ | ||
# Code written by Piotr Dollar and Tsung-Yi Lin, 2014. | ||
# Licensed under the Simplified BSD License [see bsd.txt] | ||
|
||
import copy | ||
import itertools | ||
import json | ||
import sys | ||
import time | ||
from collections import defaultdict | ||
|
||
import numpy as np | ||
from pycocotools import mask as maskUtils | ||
|
||
PYTHON_VERSION = sys.version_info[0] | ||
|
||
|
||
def _isArrayLike(obj): | ||
return hasattr(obj, '__iter__') and hasattr(obj, '__len__') | ||
|
||
|
||
class YTVIS: | ||
|
||
def __init__(self, annotation_file=None): | ||
"""Constructor of Microsoft COCO helper class for reading and | ||
visualizing annotations. | ||
:param annotation_file (str | dict): location of annotation file or | ||
dict results. | ||
:param image_folder (str): location to the folder that hosts images. | ||
:return: | ||
""" | ||
# load dataset | ||
self.dataset, self.anns, self.cats, self.vids = dict(), dict(), dict( | ||
), dict() | ||
self.vidToAnns, self.catToVids = defaultdict(list), defaultdict(list) | ||
if annotation_file is not None: | ||
print('loading annotations into memory...') | ||
tic = time.time() | ||
if type(annotation_file) == str: | ||
dataset = json.load(open(annotation_file, 'r')) | ||
else: | ||
dataset = annotation_file | ||
assert type( | ||
dataset | ||
) == dict, 'annotation file format {} not supported'.format( | ||
type(dataset)) | ||
print('Done (t={:0.2f}s)'.format(time.time() - tic)) | ||
self.dataset = dataset | ||
self.createIndex() | ||
|
||
def createIndex(self): | ||
# create index | ||
print('creating index...') | ||
anns, cats, vids = {}, {}, {} | ||
vidToAnns, catToVids = defaultdict(list), defaultdict(list) | ||
if 'annotations' in self.dataset: | ||
for ann in self.dataset['annotations']: | ||
vidToAnns[ann['video_id']].append(ann) | ||
anns[ann['id']] = ann | ||
|
||
if 'videos' in self.dataset: | ||
for vid in self.dataset['videos']: | ||
vids[vid['id']] = vid | ||
|
||
if 'categories' in self.dataset: | ||
for cat in self.dataset['categories']: | ||
cats[cat['id']] = cat | ||
|
||
if 'annotations' in self.dataset and 'categories' in self.dataset: | ||
for ann in self.dataset['annotations']: | ||
catToVids[ann['category_id']].append(ann['video_id']) | ||
|
||
print('index created!') | ||
|
||
# create class members | ||
self.anns = anns | ||
self.vidToAnns = vidToAnns | ||
self.catToVids = catToVids | ||
self.vids = vids | ||
self.cats = cats | ||
|
||
def getAnnIds(self, vidIds=[], catIds=[], areaRng=[], iscrowd=None): | ||
"""Get ann ids that satisfy given filter conditions. default skips that | ||
filter. | ||
:param vidIds (int array) : get anns for given vids | ||
catIds (int array) : get anns for given cats | ||
areaRng (float array) : get anns for given area range | ||
iscrowd (boolean) : get anns for given crowd label | ||
:return: ids (int array) : integer array of ann ids | ||
""" | ||
vidIds = vidIds if _isArrayLike(vidIds) else [vidIds] | ||
catIds = catIds if _isArrayLike(catIds) else [catIds] | ||
|
||
if len(vidIds) == len(catIds) == len(areaRng) == 0: | ||
anns = self.dataset['annotations'] | ||
else: | ||
if not len(vidIds) == 0: | ||
lists = [ | ||
self.vidToAnns[vidId] for vidId in vidIds | ||
if vidId in self.vidToAnns | ||
] | ||
anns = list(itertools.chain.from_iterable(lists)) | ||
else: | ||
anns = self.dataset['annotations'] | ||
anns = anns if len(catIds) == 0 else [ | ||
ann for ann in anns if ann['category_id'] in catIds | ||
] | ||
anns = anns if len(areaRng) == 0 else [ | ||
ann for ann in anns if ann['avg_area'] > areaRng[0] | ||
and ann['avg_area'] < areaRng[1] | ||
] | ||
if iscrowd is not None: | ||
ids = [ann['id'] for ann in anns if ann['iscrowd'] == iscrowd] | ||
else: | ||
ids = [ann['id'] for ann in anns] | ||
return ids | ||
|
||
def getCatIds(self, catNms=[], supNms=[], catIds=[]): | ||
"""filtering parameters. default skips that filter. | ||
:param catNms (str array) : get cats for given cat names | ||
:param supNms (str array) : get cats for given supercategory names | ||
:param catIds (int array) : get cats for given cat ids | ||
:return: ids (int array) : integer array of cat ids | ||
""" | ||
catNms = catNms if _isArrayLike(catNms) else [catNms] | ||
supNms = supNms if _isArrayLike(supNms) else [supNms] | ||
catIds = catIds if _isArrayLike(catIds) else [catIds] | ||
|
||
if len(catNms) == len(supNms) == len(catIds) == 0: | ||
cats = self.dataset['categories'] | ||
else: | ||
cats = self.dataset['categories'] | ||
cats = cats if len(catNms) == 0 else [ | ||
cat for cat in cats if cat['name'] in catNms | ||
] | ||
cats = cats if len(supNms) == 0 else [ | ||
cat for cat in cats if cat['supercategory'] in supNms | ||
] | ||
cats = cats if len(catIds) == 0 else [ | ||
cat for cat in cats if cat['id'] in catIds | ||
] | ||
ids = [cat['id'] for cat in cats] | ||
return ids | ||
|
||
def getVidIds(self, vidIds=[], catIds=[]): | ||
"""Get vid ids that satisfy given filter conditions. | ||
:param vidIds (int array) : get vids for given ids | ||
:param catIds (int array) : get vids with all given cats | ||
:return: ids (int array) : integer array of vid ids | ||
""" | ||
vidIds = vidIds if _isArrayLike(vidIds) else [vidIds] | ||
catIds = catIds if _isArrayLike(catIds) else [catIds] | ||
|
||
if len(vidIds) == len(catIds) == 0: | ||
ids = self.vids.keys() | ||
else: | ||
ids = set(vidIds) | ||
for i, catId in enumerate(catIds): | ||
if i == 0 and len(ids) == 0: | ||
ids = set(self.catToVids[catId]) | ||
else: | ||
ids &= set(self.catToVids[catId]) | ||
return list(ids) | ||
|
||
def loadAnns(self, ids=[]): | ||
"""Load anns with the specified ids. | ||
:param ids (int array) : integer ids specifying anns | ||
:return: anns (object array) : loaded ann objects | ||
""" | ||
if _isArrayLike(ids): | ||
return [self.anns[id] for id in ids] | ||
elif type(ids) == int: | ||
return [self.anns[ids]] | ||
|
||
def loadCats(self, ids=[]): | ||
"""Load cats with the specified ids. | ||
:param ids (int array) : integer ids specifying cats | ||
:return: cats (object array) : loaded cat objects | ||
""" | ||
if _isArrayLike(ids): | ||
return [self.cats[id] for id in ids] | ||
elif type(ids) == int: | ||
return [self.cats[ids]] | ||
|
||
def loadVids(self, ids=[]): | ||
"""Load anns with the specified ids. | ||
:param ids (int array) : integer ids specifying vid | ||
:return: vids (object array) : loaded vid objects | ||
""" | ||
if _isArrayLike(ids): | ||
return [self.vids[id] for id in ids] | ||
elif type(ids) == int: | ||
return [self.vids[ids]] | ||
|
||
def loadRes(self, resFile): | ||
"""Load result file and return a result api object. | ||
:param resFile (str) : file name of result file | ||
:return: res (obj) : result api object | ||
""" | ||
res = YTVIS() | ||
res.dataset['videos'] = [img for img in self.dataset['videos']] | ||
|
||
print('Loading and preparing results...') | ||
tic = time.time() | ||
if type(resFile) == str or (PYTHON_VERSION == 2 | ||
and type(resFile) == str): | ||
anns = json.load(open(resFile)) | ||
elif type(resFile) == np.ndarray: | ||
anns = self.loadNumpyAnnotations(resFile) | ||
else: | ||
anns = resFile | ||
assert type(anns) == list, 'results in not an array of objects' | ||
annsVidIds = [ann['video_id'] for ann in anns] | ||
assert set(annsVidIds) == (set(annsVidIds) & set(self.getVidIds())), \ | ||
'Results do not correspond to current coco set' | ||
if 'segmentations' in anns[0]: | ||
res.dataset['categories'] = copy.deepcopy( | ||
self.dataset['categories']) | ||
for id, ann in enumerate(anns): | ||
ann['areas'] = [] | ||
if 'bboxes' not in ann: | ||
ann['bboxes'] = [] | ||
for seg in ann['segmentations']: | ||
# now only support compressed RLE format | ||
# as segmentation results | ||
if seg: | ||
ann['areas'].append(maskUtils.area(seg)) | ||
if len(ann['bboxes']) < len(ann['areas']): | ||
ann['bboxes'].append(maskUtils.toBbox(seg)) | ||
else: | ||
ann['areas'].append(None) | ||
if len(ann['bboxes']) < len(ann['areas']): | ||
ann['bboxes'].append(None) | ||
ann['id'] = id + 1 | ||
l_ori = [a for a in ann['areas'] if a] | ||
if len(l_ori) == 0: | ||
ann['avg_area'] = 0 | ||
else: | ||
ann['avg_area'] = np.array(l_ori).mean() | ||
ann['iscrowd'] = 0 | ||
print('DONE (t={:0.2f}s)'.format(time.time() - tic)) | ||
|
||
res.dataset['annotations'] = anns | ||
res.createIndex() | ||
return res | ||
|
||
def annToRLE(self, ann, frameId): | ||
"""Convert annotation which can be polygons, uncompressed RLE to RLE. | ||
:return: binary mask (numpy 2D array) | ||
""" | ||
t = self.vids[ann['video_id']] | ||
h, w = t['height'], t['width'] | ||
segm = ann['segmentations'][frameId] | ||
if type(segm) == list: | ||
# polygon -- a single object might consist of multiple parts | ||
# we merge all parts into one mask rle code | ||
rles = maskUtils.frPyObjects(segm, h, w) | ||
rle = maskUtils.merge(rles) | ||
elif type(segm['counts']) == list: | ||
# uncompressed RLE | ||
rle = maskUtils.frPyObjects(segm, h, w) | ||
else: | ||
# rle | ||
rle = segm | ||
return rle | ||
|
||
def annToMask(self, ann, frameId): | ||
"""Convert annotation which can be polygons, uncompressed RLE, or RLE | ||
to binary mask. | ||
:return: binary mask (numpy 2D array) | ||
""" | ||
rle = self.annToRLE(ann, frameId) | ||
m = maskUtils.decode(rle) | ||
return m |
Oops, something went wrong.