Evaluate Model
There needs two steps to evaluate tracking performance. We need to first get prediction on the test dataset using tools/test_video.py and then use kitti's evaluation tool to evaluate tracking.
get prediction result
test_video.py
import argparse
import torch
import mmcv
import cv2
import os
import numpy as np
from tqdm import tqdm
from mmcv.runner import load_checkpoint, parallel_test, obj_from_dict
from mmcv.parallel import scatter, collate, MMDataParallel
from mmdet import datasets
from mmdet.core import results2json_videoseg, ytvos_eval
from mmdet.datasets import build_dataloader
from mmdet.models import build_detector, detectors
def single_test(model, data_loader, show=False, save_path=''):
model.eval()
results = []
dataset = data_loader.dataset
prog_bar = mmcv.ProgressBar(len(dataset))
for i, data in enumerate(data_loader):
with torch.no_grad():
result = model(return_loss=False, rescale=not show, **data)
results.append(result)
if show:
model.module.show_result(data, result, dataset.img_norm_cfg,
dataset=dataset.CLASSES,
save_vis = True,
save_path = save_path,
is_video = True)
batch_size = data['img'][0].size(0)
for _ in range(batch_size):
prog_bar.update()
return results
def _data_func(data, device_id):
data = scatter(collate([data], samples_per_gpu=1), [device_id])[0]
return dict(return_loss=False, rescale=True, **data)
def parse_args():
parser = argparse.ArgumentParser(description='MMDet test detector')
parser.add_argument('config', help='test config file path')
parser.add_argument('checkpoint', help='checkpoint file')
parser.add_argument(
'--save_path',
type=str,
help='path to save visual result')
parser.add_argument(
'--gpus', default=1, type=int, help='GPU number used for testing')
parser.add_argument(
'--proc_per_gpu',
default=1,
type=int,
help='Number of processes per GPU')
parser.add_argument('--out', help='output result file')
parser.add_argument('--load_result',
action='store_true',
help='whether to load existing result')
parser.add_argument(
'--eval',
type=str,
nargs='+',
choices=['bbox', 'segm'],
help='eval types')
parser.add_argument('--show', action='store_true', help='show results')
args = parser.parse_args()
return args
def mkdir_if_missing(dir):
os.makedirs(dir, exist_ok=True)
def results2kitti(dataset, results, outpath, sequencesList, show):
json_results = []
vid_objs = {}
local2globalId = {}
mkdir_if_missing(outpath)
save_dict = {}
label_map = ['Pedestrian','Car']
for idx in range(len(dataset)):
# assume results is ordered
# the prediction result has obj_id starting from 0 for each sequence
# , while the kitti expects no overlapping instance id between sequences.
# vid_id is 0 to number of video - 1.
vid_id, frame_id = dataset.img_ids[idx]
det, seg = results[idx]
for obj_id in det:
instance_key = "%d_%d"%(vid_id,obj_id)
instance_id = local2globalId.get(instance_key,-1)
if(instance_id == -1):
local2globalId[instance_key] = len(local2globalId.keys())+1
instance_id = local2globalId[instance_key]
predict = det[obj_id]['bbox']
label = label_map[det[obj_id]['label']]
score = predict[4]
left, top, right, bottom = list(map(float,predict[0:-1]))
formatedStr = "%d %s %s 0 0 0 %f %f %f %f 0 0 0 0 0 0 %s \n"%(frame_id, instance_id, label, left, top,right, bottom,score)
temp_dict = save_dict.get(vid_id,[])
temp_dict.append(formatedStr)
save_dict[vid_id] = temp_dict
for sequence in save_dict.keys():
save_file_path = os.path.join(outpath,'%04d.txt'%(sequencesList[sequence]))
with open(save_file_path, "w", encoding='utf-8') as dest:
print("save labels for sequence %s to %s"%(sequencesList[sequence],save_file_path))
for line in save_dict[sequence]:
dest.write(line)
def main():
args = parse_args()
if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):
raise ValueError('The output file must be a pkl file.')
cfg = mmcv.Config.fromfile(args.config)
# set cudnn_benchmark
if cfg.get('cudnn_benchmark', False):
torch.backends.cudnn.benchmark = True
cfg.model.pretrained = None
cfg.data.test.test_mode = True
dataset = obj_from_dict(cfg.data.test, datasets, dict(test_mode=True))
assert args.gpus == 1
model = build_detector(
cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
load_checkpoint(model, args.checkpoint)
model = MMDataParallel(model, device_ids=[0])
data_loader = build_dataloader(
dataset,
imgs_per_gpu=1,
workers_per_gpu=cfg.data.workers_per_gpu,
num_gpus=1,
dist=False,
shuffle=False)
outputs = single_test(model, data_loader, args.show, save_path=args.save_path)
# sequenceList = [2, 7, 10, 14, 16]
sequenceList = [2, 6, 7, 8, 10, 13, 14, 16, 18]
if args.out:
results2kitti(dataset,outputs,args.out.replace(".pkl",".kitti"),sequenceList,args.show)
eval_types = args.eval
if eval_types:
print('Starting evaluate {}'.format(' and '.join(eval_types)))
if not isinstance(outputs[0], dict):
result_file = args.out.replace(".pkl",".json")
results2json_videoseg(dataset, outputs, result_file)
ytvos_eval(result_file, eval_types, dataset.ytvos)
else:
NotImplemented
if args.show:
print("reconstruct image save folder")
for sequence in tqdm(range(len(sequenceList))):
source_path = "%s/%d"%(args.save_path,sequence)
dest_path = "%s/%04d"%(args.save_path,sequenceList[sequence])
cmd = "mv %s %s"
os.system(cmd%(source_path,dest_path))
for _, _, filenames in os.walk(dest_path):
cmd = "mv %s/%d.png %s/%04d.png"
for filename in filenames:
frame_id = int(filename[:-4])
os.system(cmd%(dest_path,frame_id,dest_path,frame_id))
if __name__ == '__main__':
main()
python3 tools/test_video.py ConfigFile CheckpointPath --out output/Name.pkl
Update the ConfigFile
, CheckpointPath
, Name
accordingly. The result will be saved in output/ folder as [sequencenumber].txt files.
For the modified test_video.py, there are some hard-code property that need to be careful.
Line 84: label_map = ['Pedestrian','Car']
Line 149: sequenceList = [2, 6, 7, 8, 10, 13, 14, 16, 18]
The result label is just a number, and the line 84 map that to string. The result video id is in (0,n) range, and line 149 map that to the original sequence number.
visualize
IMPORTANT: When running visulization, the output label is not rescaled to the original size. Thus using these labels to evaluate tracking result will get not get good result. This is a feature designed by mmdetection to seperate the visulization step and evaluation step.
python3 tools/test_video.py ConfigFile CheckpointPath --out output/Name.pkl --show --save_path="./vis"
evaluate tracking
There are some scripts that help to setup kitti mot evaluation and facilitate evaluation.
setup_kitti_eval.sh
#!/bin/sh
eval "$(conda shell.bash hook)"
label_02_path="/home/liz220/Documents/code/MaskTrackRCNN/data/MOTS/annotations/training/label_02"
if [ $label_02_path == ""]
then
echo "ERROR: You need to modify the label_02_path in this script first"
else
wget https://s3.eu-central-1.amazonaws.com/avg-kitti/devkit_tracking.zip
unzip devkit_tracking.zip
rm devkit_tracking.zip
mv devkit kitti_eval
rm -r kitti_eval/matlab
rm -r kitti_eval/python/data/tracking/label_02
ln -s $label_02_path kitti_eval/python/data/tracking/label_02
cp kitti_eval/python/data/tracking/evaluate_tracking.seqmap kitti_eval/python/data/tracking/evaluate_tracking.seqmap.original
cd kitti_eval/python
git clone https://gist.github.com/8f0a0fce563bc5af618ab196781e45dd.git
cp 8f0a0fce563bc5af618ab196781e45dd/prepare_seqmap.py .
rm -rf 8f0a0fce563bc5af618ab196781e45dd
conda create -n py2 python=2.7 -y
conda activate py2
cd ../..
fi
evaluate_kitti.sh
#!/bin/bash
eval "$(conda shell.bash hook)"
if [ -z $1 ] | [ -z $2 ]
then
echo "ERROR: You need to provide path to the directory that contains prediction result, and the resulting folder name"
elif [ -d $1 ]
then
echo "copying prediction result from $1 to ./kitti_eval/python/results/$2/data/"
mkdir -p ./kitti_eval/python/results/$2/data/
cp $1/*.txt ./kitti_eval/python/results/$2/data/
cd ./kitti_eval/python/
python prepare_seqmap.py ./results/$2/data/
conda activate py2
python evaluate_tracking.py $2
cd ../../
else
echo "ERROR: Not a valid directory path"
fi
visualize_kitti_failure_cases.sh
#!/bin/bash
eval "$(conda shell.bash hook)"
if [ -z $1 ] | [ -z $2 ]
then
echo "ERROR: You need to provide path to the directory that contains prediction result, and the resulting folder name"
elif [ -d $1 ]
then
echo "copying prediction result from $1 to ./kitti_eval/python/results/$2/data/"
mkdir -p ./kitti_eval/python/results/$2/data/
cp $1/*.txt ./kitti_eval/python/results/$2/data/
cd ./kitti_eval/python/
rm -r ./results/$2/outputCases/
mkdir -p ./results/$2/outputCases/tps
mkdir -p ./results/$2/outputCases/fn_part1
python prepare_seqmap.py ./results/$2/data/
conda activate py2
python evaluate_tracking_vis.py $2
conda activate MaskTrackRCNN
python construct_all_fns.py --save-dir="./results/$2/outputCases/fns" --tp-dir="./results/$2/outputCases/tps" --video-dir="./results/$2/outputCases/fns_video" --save-videos
cd ../../
else
echo "ERROR: Not a valid directory path"
fi
construct_all_fns.py
# use the ground truths as basis, exclude all cases in tp(true positive cases),
# and filter out the the ignored cases (person sitting, etc.)
# should very much approximate the false negative cases
import os.path as osp
import argparse
import glob, os, cv2
import numpy as np
def mkdir_if_missing(dir):
os.makedirs(dir, exist_ok=True)
def getNumFrames(path):
return len(glob.glob(osp.join(path,"*.png")))
def addToDict(dict, key, ele):
tempList = dict.get(key,[])
tempList.append(ele)
dict[key] = tempList
def load_groundtruths(opt,sequenceName):
gt_dict = {}
with open( osp.join(opt.gt_dir,sequenceName+".txt"),'r',encoding = 'utf-8') as f:
lines = f.readlines()
for line in lines:
line = line.split(' ')
frame_id , category, left, top, right, bottom = int(line[0]), line[2], float(line[6]), float(line[7]), float(line[8]), float(line[9])
if(category.lower() == 'pedestrian' or category.lower() == 'car'):
addToDict(gt_dict,frame_id,[left,top,right,bottom])
return gt_dict
def load_TPs(opt,sequenceName):
tp_dict = {}
with open( osp.join(opt.tp_dir,sequenceName+".txt"),'r',encoding = 'utf-8') as f:
lines = f.readlines()
for line in lines:
line = line.split(' ')
frame_id , left, top, right, bottom = int(line[0]), float(line[1]), float(line[2]), float(line[3]), float(line[4])
addToDict(tp_dict, frame_id, [left,top,right,bottom])
return tp_dict
def countUnion(gt, tp):
iou = 0
# determine the (x, y)-coordinates of the intersection rectangle
xA = max(tp[0], gt[0])
yA = max(tp[1], gt[1])
xB = min(tp[2], gt[2])
yB = min(tp[3], gt[3])
# compute the area of intersection rectangle
interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
# compute the area of both the prediction and ground-truth
# rectangles
boxArea = (tp[2] - tp[0] + 1) * (tp[3] - tp[1] + 1)
gtArea = (gt[2] - gt[0] + 1) * (gt[3] - gt[1] + 1)
# compute the intersection over union by taking the intersection
# area and dividing it by the sum of prediction + ground-truth
# areas - the interesection area
iou = interArea / float(boxArea + gtArea - interArea)
# return the intersection over union value
return iou
def filterOutTps(gts,tps,numImgForSeq):
fn_dict = {}
for frame_id in range(numImgForSeq):
for gt in gts.get(frame_id,[]):
is_fn = True
for tp in tps.get(frame_id,[]):
if(countUnion(gt,tp)>0.9):
is_fn = False
break
if is_fn:
addToDict(fn_dict,frame_id, gt)
return fn_dict
def writeToFile(save_dir,sequenceName, fns, numImgForSeq):
with open(osp.join(save_dir,sequenceName+".txt"), 'w',encoding='utf-8') as f:
for frame_id in range(numImgForSeq):
for bbox in fns.get(frame_id,[]):
f.write("%d %f %f %f %f\n"%(frame_id, bbox[0], bbox[1], bbox[2], bbox[3]))
def plot_bbox(image, bboxs, frame_id=0):
im = np.ascontiguousarray(np.copy(image))
im_h, im_w = im.shape[:2]
top_view = np.zeros([im_w, im_w, 3], dtype=np.uint8) + 255
text_scale = max(1, image.shape[1] / 1600.)
text_thickness = 1 if text_scale > 1.1 else 1
line_thickness = max(1, int(image.shape[1] / 500.))
radius = max(5, int(im_w/140.))
cv2.putText(im, 'frame: %d num: %d' % (frame_id, len(bboxs)),
(0, int(15 * text_scale)), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255), thickness=2)
for i, bbox in enumerate(bboxs):
x1, y1, x2, y2 = bbox
intbox = tuple(map(int, (x1, y1, x2, y2)))
color = (255, 0, 0)
cv2.rectangle(im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness)
return im
def save_video(video_save_folder_path, src_img_dir_path,fns,sequenceName):
for imagePath in glob.glob(osp.join(src_img_dir_path,"*.png")):
frame_id = int(imagePath[-10:-4])
img0 = cv2.imread(imagePath)
bboxs = fns.get(frame_id,[])
output_video_path = osp.join(video_save_folder_path,sequenceName+'.mp4')
online_im = plot_bbox(img0, bboxs, frame_id=frame_id)
cv2.imwrite(osp.join(video_save_folder_path, '{:05d}.jpg'.format(frame_id)), online_im)
cmd_str = 'ffmpeg -i {}/%05d.jpg -f mp4 -vcodec h264 -pix_fmt yuv420p -vf "scale=trunc(iw/2)*2:trunc(ih/2)*2" {}'.format(video_save_folder_path, output_video_path)
os.system(cmd_str)
if __name__ == '__main__':
parser = argparse.ArgumentParser(prog='construct_all_fns.py')
parser.add_argument('--save-dir', type=str, help='the output directory')
parser.add_argument('--gt-dir', type=str, default='./data/tracking/label_02', help='the path to gt label folder')
parser.add_argument('--tp-dir', type=str, help='the path to the true positive bboxs')
parser.add_argument('--img-dir', type =str, default='../../data/MOTS/images/image_02')
parser.add_argument('--video-dir',type=str)
parser.add_argument('--save-videos', action='store_true', help='save tracking results (video)')
opt = parser.parse_args()
for file in glob.glob(osp.join(opt.tp_dir,"*.txt")):
sequenceName = file[-8:-4] # ex. 0002
gts = load_groundtruths(opt,sequenceName)
tps = load_TPs(opt,sequenceName)
numImgForSeq = getNumFrames(osp.join(opt.img_dir,sequenceName))
fns = filterOutTps(gts,tps,numImgForSeq)
mkdir_if_missing(opt.save_dir)
writeToFile(opt.save_dir, sequenceName, fns, numImgForSeq)
if opt.save_videos:
video_save_folder_path = osp.join(opt.video_dir, sequenceName)
mkdir_if_missing(video_save_folder_path)
src_img_dir_path = osp.join(opt.img_dir,sequenceName)
save_video(video_save_folder_path, src_img_dir_path,fns,sequenceName)
print('extraction finish.')
- put the setup_kitti_eval.sh, evaluate_kitti.sh, visualize_kitti_failure_cases.sh file at your projects’ home directory
- change the label_02_path in the setup_kitti_eval.sh to correct path to kitti’s label folder
- The two scripts only work when you are in the base environment, so run conda deactivate if you are in another conda environment.
- run the setup_kitti_eval.sh. It will setup the kitti_eval folder and also create a python2 conda environment name py2.
- Download the construct_all_fns.py file and put at kitti_eval/python/*.
- run
sh evaluate_kitti.sh path_to_prediction_result_folder FOLDERNAME
to get evaluation result. It will automatically change the seqmap. The FOLDERNAME is the name of the folder that the evaluation will be saved in. - run
sh visualize_kitti_failure_cases.sh path_to_prediction_result FOLDERNAME
to write failure cases to kitti_eval/python/results/FOLDERNAME/outputCases/fns_video, and failure labels to kitti_eval/python/results/FOLDERNAME/outputCases/fns folder.