Evaluate Model

There needs two steps to evaluate tracking performance. We need to first get prediction on the test dataset using tools/test_video.py and then use kitti's evaluation tool to evaluate tracking.

get prediction result

test_video.py
import argparse

import torch
import mmcv
import cv2
import os
import numpy as np
from tqdm import tqdm
from mmcv.runner import load_checkpoint, parallel_test, obj_from_dict
from mmcv.parallel import scatter, collate, MMDataParallel

from mmdet import datasets
from mmdet.core import results2json_videoseg, ytvos_eval
from mmdet.datasets import build_dataloader
from mmdet.models import build_detector, detectors


def single_test(model, data_loader, show=False, save_path=''):
    model.eval()
    results = []
    dataset = data_loader.dataset
    prog_bar = mmcv.ProgressBar(len(dataset))
    for i, data in enumerate(data_loader):
        with torch.no_grad():
            result = model(return_loss=False, rescale=not show, **data)
        results.append(result)

        if show:
            model.module.show_result(data, result, dataset.img_norm_cfg,
                                     dataset=dataset.CLASSES,
                                     save_vis = True,
                                     save_path = save_path,
                                     is_video = True)

        batch_size = data['img'][0].size(0)
        for _ in range(batch_size):
            prog_bar.update()
    return results


def _data_func(data, device_id):
    data = scatter(collate([data], samples_per_gpu=1), [device_id])[0]
    return dict(return_loss=False, rescale=True, **data)


def parse_args():
    parser = argparse.ArgumentParser(description='MMDet test detector')
    parser.add_argument('config', help='test config file path')
    parser.add_argument('checkpoint', help='checkpoint file')
    parser.add_argument(
        '--save_path', 
        type=str,
        help='path to save visual result')
    parser.add_argument(
        '--gpus', default=1, type=int, help='GPU number used for testing')
    parser.add_argument(
        '--proc_per_gpu',
        default=1,
        type=int,
        help='Number of processes per GPU')
    parser.add_argument('--out', help='output result file')
    parser.add_argument('--load_result', 
        action='store_true', 
        help='whether to load existing result')
    parser.add_argument(
        '--eval',
        type=str,
        nargs='+',
        choices=['bbox', 'segm'],
        help='eval types')
    parser.add_argument('--show', action='store_true', help='show results')
    args = parser.parse_args()
    return args

def mkdir_if_missing(dir):
    os.makedirs(dir, exist_ok=True)

def results2kitti(dataset, results, outpath, sequencesList, show):
    json_results = []
    vid_objs = {}
    local2globalId = {}
    mkdir_if_missing(outpath)
    save_dict = {}
    label_map = ['Pedestrian','Car']

    for idx in range(len(dataset)):
      # assume results is ordered
      # the prediction result has obj_id starting from 0 for each sequence
      # , while the kitti expects no overlapping instance id between sequences.
      # vid_id is 0 to number of video - 1.

      vid_id, frame_id = dataset.img_ids[idx] 
      det, seg = results[idx]
      for obj_id in det:
        instance_key = "%d_%d"%(vid_id,obj_id)
        instance_id = local2globalId.get(instance_key,-1)

        if(instance_id == -1):
            local2globalId[instance_key] = len(local2globalId.keys())+1
            instance_id = local2globalId[instance_key]

        predict = det[obj_id]['bbox']
        label = label_map[det[obj_id]['label']]
        score = predict[4]
        left, top, right, bottom = list(map(float,predict[0:-1]))
        formatedStr = "%d %s %s 0 0 0 %f %f %f %f 0 0 0 0 0 0 %s \n"%(frame_id, instance_id, label, left, top,right, bottom,score)
        temp_dict = save_dict.get(vid_id,[])
        temp_dict.append(formatedStr)
        save_dict[vid_id] = temp_dict

    for sequence in save_dict.keys():
        save_file_path = os.path.join(outpath,'%04d.txt'%(sequencesList[sequence]))
        with open(save_file_path, "w", encoding='utf-8') as dest:
            print("save labels for sequence %s to %s"%(sequencesList[sequence],save_file_path))
            for line in save_dict[sequence]:
                dest.write(line)

def main():
    args = parse_args()

    if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):
        raise ValueError('The output file must be a pkl file.')

    cfg = mmcv.Config.fromfile(args.config)
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
    cfg.model.pretrained = None
    cfg.data.test.test_mode = True

    dataset = obj_from_dict(cfg.data.test, datasets, dict(test_mode=True))
    assert args.gpus == 1
    model = build_detector(
        cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
    load_checkpoint(model, args.checkpoint)
    model = MMDataParallel(model, device_ids=[0])

    data_loader = build_dataloader(
        dataset,
        imgs_per_gpu=1,
        workers_per_gpu=cfg.data.workers_per_gpu,
        num_gpus=1,
        dist=False,
        shuffle=False)

    outputs = single_test(model, data_loader, args.show, save_path=args.save_path)

    # sequenceList = [2, 7, 10, 14, 16]
    sequenceList = [2, 6, 7, 8, 10, 13, 14, 16, 18]
    if args.out:
        results2kitti(dataset,outputs,args.out.replace(".pkl",".kitti"),sequenceList,args.show)

        eval_types = args.eval
        if eval_types:
            print('Starting evaluate {}'.format(' and '.join(eval_types)))
            if not isinstance(outputs[0], dict):
                result_file = args.out.replace(".pkl",".json")
                results2json_videoseg(dataset, outputs, result_file)
                ytvos_eval(result_file, eval_types, dataset.ytvos)
            else:
                NotImplemented

    if args.show:
        print("reconstruct image save folder")
        for sequence in tqdm(range(len(sequenceList))):
            source_path = "%s/%d"%(args.save_path,sequence)
            dest_path = "%s/%04d"%(args.save_path,sequenceList[sequence])
            cmd = "mv %s %s"
            os.system(cmd%(source_path,dest_path))
            for _, _, filenames in os.walk(dest_path):
                cmd = "mv %s/%d.png %s/%04d.png"
                for filename in filenames:
                    frame_id = int(filename[:-4])
                    os.system(cmd%(dest_path,frame_id,dest_path,frame_id))



if __name__ == '__main__':
    main()


python3 tools/test_video.py ConfigFile CheckpointPath --out output/Name.pkl

Update the ConfigFile, CheckpointPath, Name accordingly. The result will be saved in output/ folder as [sequencenumber].txt files.

For the modified test_video.py, there are some hard-code property that need to be careful.

Line 84: label_map = ['Pedestrian','Car']
Line 149: sequenceList = [2, 6, 7, 8, 10, 13, 14, 16, 18] 

The result label is just a number, and the line 84 map that to string. The result video id is in (0,n) range, and line 149 map that to the original sequence number.

visualize

IMPORTANT: When running visulization, the output label is not rescaled to the original size. Thus using these labels to evaluate tracking result will get not get good result. This is a feature designed by mmdetection to seperate the visulization step and evaluation step.

python3 tools/test_video.py ConfigFile CheckpointPath --out output/Name.pkl --show --save_path="./vis"

evaluate tracking

There are some scripts that help to setup kitti mot evaluation and facilitate evaluation.

setup_kitti_eval.sh
#!/bin/sh
eval "$(conda shell.bash hook)"
label_02_path="/home/liz220/Documents/code/MaskTrackRCNN/data/MOTS/annotations/training/label_02" 

if [ $label_02_path == ""]
then 
    echo "ERROR: You need to modify the label_02_path in this script first"
else
    wget https://s3.eu-central-1.amazonaws.com/avg-kitti/devkit_tracking.zip
    unzip devkit_tracking.zip
    rm devkit_tracking.zip
    mv devkit kitti_eval
    rm -r kitti_eval/matlab
    rm -r kitti_eval/python/data/tracking/label_02
    ln -s $label_02_path kitti_eval/python/data/tracking/label_02
    cp kitti_eval/python/data/tracking/evaluate_tracking.seqmap kitti_eval/python/data/tracking/evaluate_tracking.seqmap.original
    cd kitti_eval/python
    git clone https://gist.github.com/8f0a0fce563bc5af618ab196781e45dd.git
    cp 8f0a0fce563bc5af618ab196781e45dd/prepare_seqmap.py .
    rm -rf 8f0a0fce563bc5af618ab196781e45dd
    conda create -n py2 python=2.7 -y
    conda activate py2
    cd ../..
fi
evaluate_kitti.sh
#!/bin/bash
eval "$(conda shell.bash hook)"
if [ -z $1 ] | [ -z $2 ]
then
    echo "ERROR: You need to provide path to the directory that contains prediction result, and the resulting folder name"
elif [ -d $1 ]
then
    echo "copying prediction result from $1 to ./kitti_eval/python/results/$2/data/"
    mkdir -p ./kitti_eval/python/results/$2/data/
    cp $1/*.txt ./kitti_eval/python/results/$2/data/
    cd ./kitti_eval/python/
    python prepare_seqmap.py ./results/$2/data/
    conda activate py2
    python evaluate_tracking.py $2
    cd ../../
else 
    echo "ERROR: Not a valid directory path"
fi
visualize_kitti_failure_cases.sh
#!/bin/bash
eval "$(conda shell.bash hook)"
if [ -z $1 ] | [ -z $2 ]
then
    echo "ERROR: You need to provide path to the directory that contains prediction result, and the resulting folder name"
elif [ -d $1 ]
then
    echo "copying prediction result from $1 to ./kitti_eval/python/results/$2/data/"
    mkdir -p ./kitti_eval/python/results/$2/data/
    cp $1/*.txt ./kitti_eval/python/results/$2/data/
    cd ./kitti_eval/python/
    rm -r ./results/$2/outputCases/
    mkdir -p ./results/$2/outputCases/tps
    mkdir -p ./results/$2/outputCases/fn_part1
    python prepare_seqmap.py ./results/$2/data/
    conda activate py2
    python evaluate_tracking_vis.py $2
    conda activate MaskTrackRCNN
    python construct_all_fns.py --save-dir="./results/$2/outputCases/fns" --tp-dir="./results/$2/outputCases/tps" --video-dir="./results/$2/outputCases/fns_video" --save-videos
    cd ../../
else 
    echo "ERROR: Not a valid directory path"
fi
construct_all_fns.py
# use the ground truths as basis, exclude all cases in tp(true positive cases),
# and filter out the the ignored cases (person sitting, etc.) 
# should very much approximate the false negative cases
import os.path as osp
import argparse 
import glob, os, cv2
import numpy as np

def mkdir_if_missing(dir):
    os.makedirs(dir, exist_ok=True)

def getNumFrames(path):
    return len(glob.glob(osp.join(path,"*.png")))

def addToDict(dict, key, ele):
    tempList = dict.get(key,[])
    tempList.append(ele)
    dict[key] = tempList

def load_groundtruths(opt,sequenceName):
    gt_dict = {}
    with open( osp.join(opt.gt_dir,sequenceName+".txt"),'r',encoding = 'utf-8') as f:
        lines = f.readlines()
        for line in lines:
            line = line.split(' ')
            frame_id , category, left, top, right, bottom = int(line[0]), line[2], float(line[6]), float(line[7]), float(line[8]), float(line[9])

            if(category.lower() == 'pedestrian' or category.lower() == 'car'):
                addToDict(gt_dict,frame_id,[left,top,right,bottom])
    return gt_dict

def load_TPs(opt,sequenceName):
    tp_dict = {}
    with open( osp.join(opt.tp_dir,sequenceName+".txt"),'r',encoding = 'utf-8') as f:
        lines = f.readlines()
        for line in lines:
            line = line.split(' ')
            frame_id , left, top, right, bottom = int(line[0]), float(line[1]), float(line[2]), float(line[3]), float(line[4])

            addToDict(tp_dict, frame_id, [left,top,right,bottom])
    return tp_dict

def countUnion(gt, tp):
    iou = 0
    # determine the (x, y)-coordinates of the intersection rectangle
    xA = max(tp[0], gt[0])
    yA = max(tp[1], gt[1])
    xB = min(tp[2], gt[2])
    yB = min(tp[3], gt[3])
    # compute the area of intersection rectangle
    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
    # compute the area of both the prediction and ground-truth
    # rectangles
    boxArea = (tp[2] - tp[0] + 1) * (tp[3] - tp[1] + 1)
    gtArea = (gt[2] - gt[0] + 1) * (gt[3] - gt[1] + 1)
    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = interArea / float(boxArea + gtArea - interArea)
    # return the intersection over union value
    return iou

def filterOutTps(gts,tps,numImgForSeq):
    fn_dict = {}
    for frame_id in range(numImgForSeq):
        for gt in gts.get(frame_id,[]):
            is_fn = True
            for tp in tps.get(frame_id,[]):
                if(countUnion(gt,tp)>0.9):
                    is_fn = False
                    break
            if is_fn:
                addToDict(fn_dict,frame_id, gt)
    return fn_dict

def writeToFile(save_dir,sequenceName, fns, numImgForSeq):
    with open(osp.join(save_dir,sequenceName+".txt"), 'w',encoding='utf-8') as f:
        for frame_id in range(numImgForSeq):
            for bbox in fns.get(frame_id,[]):
                    f.write("%d %f %f %f %f\n"%(frame_id, bbox[0], bbox[1], bbox[2], bbox[3]))

def plot_bbox(image, bboxs, frame_id=0):
    im = np.ascontiguousarray(np.copy(image))
    im_h, im_w = im.shape[:2]

    top_view = np.zeros([im_w, im_w, 3], dtype=np.uint8) + 255

    text_scale = max(1, image.shape[1] / 1600.)
    text_thickness = 1 if text_scale > 1.1 else 1
    line_thickness = max(1, int(image.shape[1] / 500.))

    radius = max(5, int(im_w/140.))
    cv2.putText(im, 'frame: %d num: %d' % (frame_id, len(bboxs)),
                (0, int(15 * text_scale)), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255), thickness=2)

    for i, bbox in enumerate(bboxs):
        x1, y1, x2, y2 = bbox
        intbox = tuple(map(int, (x1, y1, x2, y2)))
        color = (255, 0, 0) 
        cv2.rectangle(im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness)
    return im


def save_video(video_save_folder_path, src_img_dir_path,fns,sequenceName):
    for imagePath in glob.glob(osp.join(src_img_dir_path,"*.png")):
        frame_id = int(imagePath[-10:-4])
        img0 = cv2.imread(imagePath)
        bboxs = fns.get(frame_id,[])
        output_video_path = osp.join(video_save_folder_path,sequenceName+'.mp4')

        online_im = plot_bbox(img0, bboxs, frame_id=frame_id) 
        cv2.imwrite(osp.join(video_save_folder_path, '{:05d}.jpg'.format(frame_id)), online_im)

    cmd_str = 'ffmpeg -i {}/%05d.jpg -f mp4 -vcodec h264 -pix_fmt yuv420p -vf "scale=trunc(iw/2)*2:trunc(ih/2)*2" {}'.format(video_save_folder_path, output_video_path)
    os.system(cmd_str)

if __name__ == '__main__':
    parser = argparse.ArgumentParser(prog='construct_all_fns.py')
    parser.add_argument('--save-dir', type=str, help='the output directory')
    parser.add_argument('--gt-dir', type=str, default='./data/tracking/label_02', help='the path to gt label folder')
    parser.add_argument('--tp-dir', type=str, help='the path to the true positive bboxs')
    parser.add_argument('--img-dir', type =str, default='../../data/MOTS/images/image_02')
    parser.add_argument('--video-dir',type=str)
    parser.add_argument('--save-videos', action='store_true', help='save tracking results (video)')
    opt = parser.parse_args()

    for file in glob.glob(osp.join(opt.tp_dir,"*.txt")):
        sequenceName = file[-8:-4] # ex. 0002
        gts = load_groundtruths(opt,sequenceName)
        tps = load_TPs(opt,sequenceName)
        numImgForSeq = getNumFrames(osp.join(opt.img_dir,sequenceName))
        fns = filterOutTps(gts,tps,numImgForSeq)
        mkdir_if_missing(opt.save_dir)
        writeToFile(opt.save_dir, sequenceName, fns, numImgForSeq)
        if opt.save_videos:
            video_save_folder_path = osp.join(opt.video_dir, sequenceName)
            mkdir_if_missing(video_save_folder_path)
            src_img_dir_path = osp.join(opt.img_dir,sequenceName)
            save_video(video_save_folder_path, src_img_dir_path,fns,sequenceName)
    print('extraction finish.')


  1. put the setup_kitti_eval.sh, evaluate_kitti.sh, visualize_kitti_failure_cases.sh file at your projects’ home directory
  2. change the label_02_path in the setup_kitti_eval.sh to correct path to kitti’s label folder
  3. The two scripts only work when you are in the base environment, so run conda deactivate if you are in another conda environment.
  4. run the setup_kitti_eval.sh. It will setup the kitti_eval folder and also create a python2 conda environment name py2.
  5. Download the construct_all_fns.py file and put at kitti_eval/python/*.
  6. run sh evaluate_kitti.sh path_to_prediction_result_folder FOLDERNAME to get evaluation result. It will automatically change the seqmap. The FOLDERNAME is the name of the folder that the evaluation will be saved in.
  7. run sh visualize_kitti_failure_cases.sh path_to_prediction_result FOLDERNAME to write failure cases to kitti_eval/python/results/FOLDERNAME/outputCases/fns_video, and failure labels to kitti_eval/python/results/FOLDERNAME/outputCases/fns folder.