Commit d3c52550 authored by Ross Girshick's avatar Ross Girshick

improve docstrings

parent 6525804a
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------
"""Factory method for easily getting imdbs by name."""
__sets = {}
import datasets.pascal_voc
import numpy as np
def _selective_search_IJCV_top_k(split, year, top_k):
"""Return an imdb that uses the top k proposals from the selective search
IJCV code.
"""
imdb = datasets.pascal_voc(split, year)
imdb.roidb_handler = imdb.selective_search_IJCV_roidb
imdb.config['top_k'] = top_k
......@@ -26,9 +38,11 @@ for top_k in np.arange(1000, 11000, 1000):
_selective_search_IJCV_top_k(split, year, top_k))
def get_imdb(name):
"""Get an imdb (image database) by name."""
if not __sets.has_key(name):
raise KeyError('Unknown dataset: {}'.format(name))
return __sets[name]()
def list_imdbs():
"""List all registered imdbs."""
return __sets.keys()
......@@ -7,12 +7,14 @@
import os
import PIL
import utils.cython_bbox
from utils.cython_bbox import bbox_overlaps
import numpy as np
import scipy.sparse
import datasets
class imdb(object):
"""Image database."""
def __init__(self, name):
self._name = name
self._num_classes = 0
......@@ -114,9 +116,8 @@ class imdb(object):
boxes = candidate_boxes[i]
if boxes.shape[0] == 0:
continue
overlaps = \
utils.cython_bbox.bbox_overlaps(boxes.astype(np.float),
gt_boxes.astype(np.float))
overlaps = bbox_overlaps(boxes.astype(np.float),
gt_boxes.astype(np.float))
# gt_overlaps = np.hstack((gt_overlaps, overlaps.max(axis=0)))
_gt_overlaps = np.zeros((gt_boxes.shape[0]))
......@@ -157,9 +158,8 @@ class imdb(object):
if gt_roidb is not None:
gt_boxes = gt_roidb[i]['boxes']
gt_classes = gt_roidb[i]['gt_classes']
gt_overlaps = \
utils.cython_bbox.bbox_overlaps(boxes.astype(np.float),
gt_boxes.astype(np.float))
gt_overlaps = bbox_overlaps(boxes.astype(np.float),
gt_boxes.astype(np.float))
argmaxes = gt_overlaps.argmax(axis=1)
maxes = gt_overlaps.max(axis=1)
I = np.where(maxes > 0)[0]
......
......@@ -5,16 +5,16 @@
# Written by Ross Girshick
# --------------------------------------------------------
#
# README
#
# This file specifies default config options for Fast R-CNN. You should not
# change values in this file. Instead, you should write a config YAML file
# and use cfg_from_file(yaml_file) to load it and override the default options.
#
# - See tools/{train,test}_net.py for example code that uses cfg_from_file().
# - See experiments/cfgs/*.yml for example YAML config override files.
#
"""Fast R-CNN config system.
This file specifies default config options for Fast R-CNN. You should not
change values in this file. Instead, you should write a config file (in yaml)
and use cfg_from_file(yaml_file) to load it and override the default options.
Most tools in $ROOT/tools take a --cfg option to specify an override file.
- See tools/{train,test}_net.py for example code that uses cfg_from_file()
- See experiments/cfgs/*.yml for example YAML config override files
"""
import os
import os.path as osp
......@@ -130,6 +130,11 @@ __C.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..'))
__C.EXP_DIR = 'default'
def get_output_dir(imdb, net):
"""Return the directory where experimental artifacts are placed.
A canonical path is built using the name from an imdb and a network
(if not None).
"""
path = osp.abspath(osp.join(__C.ROOT_DIR, 'output', __C.EXP_DIR, imdb.name))
if net is None:
return path
......@@ -137,12 +142,12 @@ def get_output_dir(imdb, net):
return osp.join(path, net.name)
def _merge_a_into_b(a, b):
"""
Merge config dictionary a into config dictionary b, clobbering the options
in b whenever they are also specified in a.
"""Merge config dictionary a into config dictionary b, clobbering the
options in b whenever they are also specified in a.
"""
if type(a) is not edict:
return
for k, v in a.iteritems():
# a must specify keys that are in b
if not b.has_key(k):
......@@ -165,10 +170,7 @@ def _merge_a_into_b(a, b):
b[k] = v
def cfg_from_file(filename):
"""
Load a config file and merge it into the default options specified in this
file.
"""
"""Load a config file and merge it into the default options."""
import yaml
with open(filename, 'r') as f:
yaml_cfg = edict(yaml.load(f))
......
......@@ -5,6 +5,8 @@
# Written by Ross Girshick
# --------------------------------------------------------
"""Test a Fast R-CNN network on an imdb (image database)."""
from fast_rcnn.config import cfg, get_output_dir
import argparse
from utils.timer import Timer
......@@ -18,6 +20,16 @@ from utils.blob import im_list_to_blob
import os
def _get_image_blob(im):
"""Converts an image into a network input.
Arguments:
im (ndarray): a color image in BGR order
Returns:
blob (ndarray): a data blob holding an image pyramid
im_scale_factors (list): list of image scales (relative to im) used
in the image pyramid
"""
im_orig = im.astype(np.float32, copy=True)
im_orig -= cfg.PIXEL_MEANS
......@@ -44,11 +56,30 @@ def _get_image_blob(im):
return blob, np.array(im_scale_factors)
def _get_rois_blob(im_rois, im_scale_factors):
rois, levels = _scale_im_rois(im_rois, im_scale_factors)
rois_blob = np.hstack((levels, rois))[:, :, np.newaxis, np.newaxis]
"""Converts RoIs into network inputs.
Arguments:
im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates
im_scale_factors (list): scale factors as returned by _get_image_blob
Returns:
blob (ndarray): R x 5 matrix of RoIs in the image pyramid
"""
rois, levels = _project_im_rois(im_rois, im_scale_factors)
rois_blob = np.hstack((levels, rois))
return rois_blob.astype(np.float32, copy=False)
def _scale_im_rois(im_rois, scales):
def _project_im_rois(im_rois, scales):
"""Project image RoIs into the image pyramid built by _get_image_blob.
Arguments:
im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates
scales (list): scale factors as returned by _get_image_blob
Returns:
rois (ndarray): R x 4 matrix of projected RoI coordinates
levels (list): image pyramid levels used by each projected RoI
"""
im_rois = im_rois.astype(np.float, copy=False)
if len(scales) > 1:
......@@ -67,12 +98,16 @@ def _scale_im_rois(im_rois, scales):
return rois, levels
def _get_blobs(im, rois):
"""Convert an image and RoIs within that image into network inputs."""
blobs = {'data' : None, 'rois' : None}
blobs['data'], im_scale_factors = _get_image_blob(im)
blobs['rois'] = _get_rois_blob(rois, im_scale_factors)
return blobs, im_scale_factors
def _bbox_pred(boxes, box_deltas):
"""Transform the set of class-agnostic boxes into class-specific boxes
by applying the predicted offsets (box_deltas)
"""
if boxes.shape[0] == 0:
return np.zeros((0, box_deltas.shape[1]))
......@@ -105,6 +140,7 @@ def _bbox_pred(boxes, box_deltas):
return pred_boxes
def _clip_boxes(boxes, im_shape):
"""Clip boxes to image boundaries."""
# x1 >= 0
boxes[:, 0::4] = np.maximum(boxes[:, 0::4], 0)
# y1 >= 0
......@@ -116,6 +152,18 @@ def _clip_boxes(boxes, im_shape):
return boxes
def im_detect(net, im, boxes):
"""Detect object classes in an image given object proposals.
Arguments:
net (caffe.Net): Fast R-CNN network to use
im (ndarray): color image to test (in BGR order)
boxes (ndarray): R x 4 array of object proposals
Returns:
scores (ndarray): R x K array of object class scores (K includes
background as object category 0)
boxes (ndarray): R x (4*K) array of predicted bounding boxes
"""
blobs, unused_im_scale_factors = _get_blobs(im, boxes)
# When mapping from image ROIs to feature map ROIs, there's some aliasing
......@@ -124,10 +172,10 @@ def im_detect(net, im, boxes):
# on the unique subset.
if cfg.DEDUP_BOXES > 0:
v = np.array([1, 1e3, 1e6, 1e9, 1e12])
hashes = np.round(blobs['rois'][:, :, 0, 0] * cfg.DEDUP_BOXES).dot(v)
hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
_, index, inv_index = np.unique(hashes, return_index=True,
return_inverse=True)
blobs['rois'] = blobs['rois'][index, :, :, :]
blobs['rois'] = blobs['rois'][index, :]
boxes = boxes[index, :]
# reshape network inputs
......@@ -160,6 +208,7 @@ def im_detect(net, im, boxes):
return scores, pred_boxes
def _vis_detections(im, class_name, dets, thresh=0.3):
"""Visual debugging of detections."""
import matplotlib.pyplot as plt
im = im[:, :, (2, 1, 0)]
for i in xrange(np.minimum(10, dets.shape[0])):
......@@ -178,6 +227,9 @@ def _vis_detections(im, class_name, dets, thresh=0.3):
plt.pause(1)
def apply_nms(all_boxes, thresh):
"""Apply non-maximum suppression to all predicted boxes output by the
test_net method.
"""
num_classes = len(all_boxes)
num_images = len(all_boxes[0])
nms_boxes = [[[] for _ in xrange(num_images)]
......@@ -194,6 +246,7 @@ def apply_nms(all_boxes, thresh):
return nms_boxes
def test_net(net, imdb):
"""Test a Fast R-CNN network on an image database."""
num_images = len(imdb.image_index)
# heuristic: keep an average of 40 detections per class per images prior
# to NMS
......
......@@ -5,6 +5,8 @@
# Written by Ross Girshick
# --------------------------------------------------------
"""Train a Fast R-CNN network."""
import caffe
from fast_rcnn.config import cfg
import roi_data_layer.roidb as rdl_roidb
......@@ -16,8 +18,14 @@ from caffe.proto import caffe_pb2
import google.protobuf as pb2
class SolverWrapper(object):
"""A simple wrapper around Caffe's solver.
This wrapper gives us control over he snapshotting process, which we
use to unnormalize the learned bounding-box regression weights.
"""
def __init__(self, solver_prototxt, roidb, output_dir,
pretrained_model=None):
"""Initialize the SolverWrapper."""
self.output_dir = output_dir
print 'Computing bounding-box regression targets...'
......@@ -38,6 +46,9 @@ class SolverWrapper(object):
self.solver.net.layers[0].set_roidb(roidb)
def snapshot(self):
"""Take a snapshot of the network after unnormalizing the learned
bounding-box regression weights. This enables easy use at test-time.
"""
net = self.solver.net
if cfg.TRAIN.BBOX_REG:
......@@ -71,6 +82,7 @@ class SolverWrapper(object):
net.params['bbox_pred'][1].data[...] = orig_1
def train_model(self, max_iters):
"""Network training loop."""
last_snapshot_iter = -1
timer = Timer()
while self.solver.iter < max_iters:
......@@ -89,6 +101,7 @@ class SolverWrapper(object):
self.snapshot()
def get_training_roidb(imdb):
"""Returns a roidb (Region of Interest database) for use in training."""
if cfg.TRAIN.USE_FLIPPED:
print 'Appending horizontally-flipped training examples...'
imdb.append_flipped_images()
......@@ -102,6 +115,7 @@ def get_training_roidb(imdb):
def train_net(solver_prototxt, roidb, output_dir,
pretrained_model=None, max_iters=40000):
"""Train a Fast R-CNN network."""
sw = SolverWrapper(solver_prototxt, roidb, output_dir,
pretrained_model=pretrained_model)
......
......@@ -5,6 +5,11 @@
# Written by Ross Girshick
# --------------------------------------------------------
"""The data layer used during training to train a Fast R-CNN network.
RoIDataLayer implements a Caffe Python layer.
"""
import caffe
from fast_rcnn.config import cfg
from roi_data_layer.minibatch import get_minibatch
......@@ -13,13 +18,15 @@ import yaml
from multiprocessing import Process, queues
class RoIDataLayer(caffe.Layer):
"""Fast R-CNN data layer."""
"""Fast R-CNN data layer used for training."""
def _shuffle_roidb_inds(self):
"""Randomly permute the training roidb."""
self._perm = np.random.permutation(np.arange(len(self._roidb)))
self._cur = 0
def _get_next_minibatch_inds(self):
"""Return the roidb indices for the next minibatch."""
if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb):
self._shuffle_roidb_inds()
......@@ -29,10 +36,17 @@ class RoIDataLayer(caffe.Layer):
@staticmethod
def _prefetch(minibatch_db, num_classes, output_queue):
"""Prefetch minibatch blobs (if enabled cfg.TRAIN.USE_PREFETCH)."""
blobs = get_minibatch(minibatch_db, num_classes)
output_queue.put(blobs)
def _get_next_minibatch(self):
"""Return the blobs to be used for the next minibatch.
If cfg.TRAIN.USE_PREFETCH is True, then blobs will be computed in a
separate process and made available through the self._prefetch_queue
queue.
"""
db_inds = self._get_next_minibatch_inds()
minibatch_db = [self._roidb[i] for i in db_inds]
if cfg.TRAIN.USE_PREFETCH:
......@@ -45,12 +59,14 @@ class RoIDataLayer(caffe.Layer):
return get_minibatch(minibatch_db, self._num_classes)
def set_roidb(self, roidb):
"""Set the roidb to be used by this layer during training."""
self._roidb = roidb
self._shuffle_roidb_inds()
if cfg.TRAIN.USE_PREFETCH:
self._get_next_minibatch()
def setup(self, bottom, top):
"""Setup the RoIDataLayer."""
if cfg.TRAIN.USE_PREFETCH:
self._prefetch_process = None
self._prefetch_queue = queues.SimpleQueue()
......@@ -78,6 +94,7 @@ class RoIDataLayer(caffe.Layer):
top[4].reshape(1, self._num_classes * 4)
def forward(self, bottom, top):
"""Get blobs and copy them into this layer's top blob vector."""
if cfg.TRAIN.USE_PREFETCH:
blobs = self._prefetch_queue.get()
self._get_next_minibatch()
......
......@@ -5,6 +5,8 @@
# Written by Ross Girshick
# --------------------------------------------------------
"""Compute minibatch blobs for training a Fast R-CNN network."""
import numpy as np
import numpy.random as npr
import cv2
......@@ -12,9 +14,7 @@ from fast_rcnn.config import cfg
from utils.blob import prep_im_for_blob, im_list_to_blob
def get_minibatch(roidb, num_classes):
"""
Given a roidb, construct a minibatch sampled from it.
"""
"""Given a roidb, construct a minibatch sampled from it."""
num_images = len(roidb)
# Sample random scales to use for each image in this batch
random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES),
......@@ -40,7 +40,7 @@ def get_minibatch(roidb, num_classes):
num_classes)
# Add to RoIs blob
rois = _scale_im_rois(im_rois, im_scales[im_i])
rois = _project_im_rois(im_rois, im_scales[im_i])
batch_ind = im_i * np.ones((rois.shape[0], 1))
rois_blob_this_image = np.hstack((batch_ind, rois))
rois_blob = np.vstack((rois_blob, rois_blob_this_image))
......@@ -65,8 +65,7 @@ def get_minibatch(roidb, num_classes):
return blobs
def _sample_rois(roidb, fg_rois_per_image, rois_per_image, num_classes):
"""
Generate a random sample of RoIs comprising foreground and background
"""Generate a random sample of RoIs comprising foreground and background
examples.
"""
# label = class RoI has max overlap with
......@@ -113,8 +112,8 @@ def _sample_rois(roidb, fg_rois_per_image, rois_per_image, num_classes):
return labels, overlaps, rois, bbox_targets, bbox_loss_weights
def _get_image_blob(roidb, scale_inds):
"""
Builds an input blob from the images in the roidb at the specified scales.
"""Builds an input blob from the images in the roidb at the specified
scales.
"""
num_images = len(roidb)
processed_ims = []
......@@ -134,20 +133,22 @@ def _get_image_blob(roidb, scale_inds):
return blob, im_scales
def _scale_im_rois(im_rois, im_scale_factor):
def _project_im_rois(im_rois, im_scale_factor):
"""Project image RoIs into the rescaled training image."""
rois = im_rois * im_scale_factor
return rois
def _get_bbox_regression_labels(bbox_target_data, num_classes):
"""
Bounding-box regression targets are stored in a compact form in the roidb.
"""Bounding-box regression targets are stored in a compact form in the
roidb.
This function expands those targets into the 4-of-4*K representation used
by the network (i.e. only one class has non-zero targets). The loss weights
are similarly expanded.
Returns:
(N, K * 4) blob of regression targets
(N, K * 4) blob of loss weights
bbox_target_data (ndarray): N x 4K blob of regression targets
bbox_loss_weights (ndarray): N x 4K blob of loss weights
"""
clss = bbox_target_data[:, 0]
bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
......
......@@ -5,13 +5,14 @@
# Written by Ross Girshick
# --------------------------------------------------------
"""Transform a roidb into a trainable roidb by adding a bunch of metadata."""
import numpy as np
from fast_rcnn.config import cfg
import utils.cython_bbox
def prepare_roidb(imdb):
"""
Enrich the imdb's roidb by adding some derived quantities that
"""Enrich the imdb's roidb by adding some derived quantities that
are useful for training. This function precomputes the maximum
overlap, taken over ground-truth boxes, between each ROI and
each ground-truth box. The class with maximum overlap is also
......@@ -37,6 +38,7 @@ def prepare_roidb(imdb):
assert all(max_classes[nonzero_inds] != 0)
def add_bbox_regression_targets(roidb):
"""Add information needed to train bounding-box regressors."""
assert len(roidb) > 0
assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?'
......@@ -80,6 +82,7 @@ def add_bbox_regression_targets(roidb):
return means.ravel(), stds.ravel()
def _compute_targets(rois, overlaps, labels):
"""Compute bounding-box regression targets for an image."""
# Ensure ROIs are floats
rois = rois.astype(np.float, copy=False)
......
......@@ -5,10 +5,16 @@
# Written by Ross Girshick
# --------------------------------------------------------
"""Blob helper functions."""
import numpy as np
import cv2
def im_list_to_blob(ims):
"""Convert a list of images into a network input.
Assumes images are already prepared (means subtracted, BGR order, ...).
"""
max_shape = np.array([im.shape for im in ims]).max(axis=0)
num_images = len(ims)
blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
......@@ -23,6 +29,7 @@ def im_list_to_blob(ims):
return blob
def prep_im_for_blob(im, pixel_means, target_size, max_size):
"""Mean subtract and scale an image for use in a blob."""
im = im.astype(np.float32, copy=False)
im -= pixel_means
im_shape = im.shape
......
......@@ -8,6 +8,7 @@
import time
class Timer(object):
"""A simple timer."""
def __init__(self):
self.total_time = 0.
self.calls = 0
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment