Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Zahra Rajabi
py-faster-rcnn
Commits
d3c52550
Commit
d3c52550
authored
Apr 27, 2015
by
Ross Girshick
Browse files
improve docstrings
parent
6525804a
Changes
10
Show whitespace changes
Inline
Side-by-side
lib/datasets/factory.py
View file @
d3c52550
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------
"""Factory method for easily getting imdbs by name."""
__sets
=
{}
import
datasets.pascal_voc
import
numpy
as
np
def
_selective_search_IJCV_top_k
(
split
,
year
,
top_k
):
"""Return an imdb that uses the top k proposals from the selective search
IJCV code.
"""
imdb
=
datasets
.
pascal_voc
(
split
,
year
)
imdb
.
roidb_handler
=
imdb
.
selective_search_IJCV_roidb
imdb
.
config
[
'top_k'
]
=
top_k
...
...
@@ -26,9 +38,11 @@ for top_k in np.arange(1000, 11000, 1000):
_selective_search_IJCV_top_k
(
split
,
year
,
top_k
))
def
get_imdb
(
name
):
"""Get an imdb (image database) by name."""
if
not
__sets
.
has_key
(
name
):
raise
KeyError
(
'Unknown dataset: {}'
.
format
(
name
))
return
__sets
[
name
]()
def
list_imdbs
():
"""List all registered imdbs."""
return
__sets
.
keys
()
lib/datasets/imdb.py
View file @
d3c52550
...
...
@@ -7,12 +7,14 @@
import
os
import
PIL
import
utils.cython_bbox
from
utils.cython_bbox
import
bbox_overlaps
import
numpy
as
np
import
scipy.sparse
import
datasets
class
imdb
(
object
):
"""Image database."""
def
__init__
(
self
,
name
):
self
.
_name
=
name
self
.
_num_classes
=
0
...
...
@@ -114,8 +116,7 @@ class imdb(object):
boxes
=
candidate_boxes
[
i
]
if
boxes
.
shape
[
0
]
==
0
:
continue
overlaps
=
\
utils
.
cython_bbox
.
bbox_overlaps
(
boxes
.
astype
(
np
.
float
),
overlaps
=
bbox_overlaps
(
boxes
.
astype
(
np
.
float
),
gt_boxes
.
astype
(
np
.
float
))
# gt_overlaps = np.hstack((gt_overlaps, overlaps.max(axis=0)))
...
...
@@ -157,8 +158,7 @@ class imdb(object):
if
gt_roidb
is
not
None
:
gt_boxes
=
gt_roidb
[
i
][
'boxes'
]
gt_classes
=
gt_roidb
[
i
][
'gt_classes'
]
gt_overlaps
=
\
utils
.
cython_bbox
.
bbox_overlaps
(
boxes
.
astype
(
np
.
float
),
gt_overlaps
=
bbox_overlaps
(
boxes
.
astype
(
np
.
float
),
gt_boxes
.
astype
(
np
.
float
))
argmaxes
=
gt_overlaps
.
argmax
(
axis
=
1
)
maxes
=
gt_overlaps
.
max
(
axis
=
1
)
...
...
lib/fast_rcnn/config.py
View file @
d3c52550
...
...
@@ -5,16 +5,16 @@
# Written by Ross Girshick
# --------------------------------------------------------
#
# README
#
# This file specifies default config options for Fast R-CNN. You should not
# change values in this file. Instead, you should wr
it
e
a
config YAML file
# and use cfg_from_file(yaml_file) to load it and override the default options.
#
#
- See tools/{train,test}_net.py for example code that uses cfg_from_file()
.
#
- See experiments/cfgs/*.yml for example YAML config override files
.
#
"""Fast R-CNN config system.
This file specifies default config options for Fast R-CNN. You should not
change values in this file. Instead, you should write a config file (in yaml)
and use cfg_from_file(yaml_file) to load
it a
nd override the default options.
Most tools in $ROOT/tools take a --cfg option to specify an override file.
- See tools/{train,test}_net.py for example code that uses cfg_from_file()
- See experiments/cfgs/*.yml for example YAML config override files
"""
import
os
import
os.path
as
osp
...
...
@@ -130,6 +130,11 @@ __C.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..'))
__C
.
EXP_DIR
=
'default'
def
get_output_dir
(
imdb
,
net
):
"""Return the directory where experimental artifacts are placed.
A canonical path is built using the name from an imdb and a network
(if not None).
"""
path
=
osp
.
abspath
(
osp
.
join
(
__C
.
ROOT_DIR
,
'output'
,
__C
.
EXP_DIR
,
imdb
.
name
))
if
net
is
None
:
return
path
...
...
@@ -137,12 +142,12 @@ def get_output_dir(imdb, net):
return
osp
.
join
(
path
,
net
.
name
)
def
_merge_a_into_b
(
a
,
b
):
"""
Merge config dictionary a into config dictionary b, clobbering the options
in b whenever they are also specified in a.
"""Merge config dictionary a into config dictionary b, clobbering the
options in b whenever they are also specified in a.
"""
if
type
(
a
)
is
not
edict
:
return
for
k
,
v
in
a
.
iteritems
():
# a must specify keys that are in b
if
not
b
.
has_key
(
k
):
...
...
@@ -165,10 +170,7 @@ def _merge_a_into_b(a, b):
b
[
k
]
=
v
def
cfg_from_file
(
filename
):
"""
Load a config file and merge it into the default options specified in this
file.
"""
"""Load a config file and merge it into the default options."""
import
yaml
with
open
(
filename
,
'r'
)
as
f
:
yaml_cfg
=
edict
(
yaml
.
load
(
f
))
...
...
lib/fast_rcnn/test.py
View file @
d3c52550
...
...
@@ -5,6 +5,8 @@
# Written by Ross Girshick
# --------------------------------------------------------
"""Test a Fast R-CNN network on an imdb (image database)."""
from
fast_rcnn.config
import
cfg
,
get_output_dir
import
argparse
from
utils.timer
import
Timer
...
...
@@ -18,6 +20,16 @@ from utils.blob import im_list_to_blob
import
os
def
_get_image_blob
(
im
):
"""Converts an image into a network input.
Arguments:
im (ndarray): a color image in BGR order
Returns:
blob (ndarray): a data blob holding an image pyramid
im_scale_factors (list): list of image scales (relative to im) used
in the image pyramid
"""
im_orig
=
im
.
astype
(
np
.
float32
,
copy
=
True
)
im_orig
-=
cfg
.
PIXEL_MEANS
...
...
@@ -44,11 +56,30 @@ def _get_image_blob(im):
return
blob
,
np
.
array
(
im_scale_factors
)
def
_get_rois_blob
(
im_rois
,
im_scale_factors
):
rois
,
levels
=
_scale_im_rois
(
im_rois
,
im_scale_factors
)
rois_blob
=
np
.
hstack
((
levels
,
rois
))[:,
:,
np
.
newaxis
,
np
.
newaxis
]
"""Converts RoIs into network inputs.
Arguments:
im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates
im_scale_factors (list): scale factors as returned by _get_image_blob
Returns:
blob (ndarray): R x 5 matrix of RoIs in the image pyramid
"""
rois
,
levels
=
_project_im_rois
(
im_rois
,
im_scale_factors
)
rois_blob
=
np
.
hstack
((
levels
,
rois
))
return
rois_blob
.
astype
(
np
.
float32
,
copy
=
False
)
def
_scale_im_rois
(
im_rois
,
scales
):
def
_project_im_rois
(
im_rois
,
scales
):
"""Project image RoIs into the image pyramid built by _get_image_blob.
Arguments:
im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates
scales (list): scale factors as returned by _get_image_blob
Returns:
rois (ndarray): R x 4 matrix of projected RoI coordinates
levels (list): image pyramid levels used by each projected RoI
"""
im_rois
=
im_rois
.
astype
(
np
.
float
,
copy
=
False
)
if
len
(
scales
)
>
1
:
...
...
@@ -67,12 +98,16 @@ def _scale_im_rois(im_rois, scales):
return
rois
,
levels
def
_get_blobs
(
im
,
rois
):
"""Convert an image and RoIs within that image into network inputs."""
blobs
=
{
'data'
:
None
,
'rois'
:
None
}
blobs
[
'data'
],
im_scale_factors
=
_get_image_blob
(
im
)
blobs
[
'rois'
]
=
_get_rois_blob
(
rois
,
im_scale_factors
)
return
blobs
,
im_scale_factors
def
_bbox_pred
(
boxes
,
box_deltas
):
"""Transform the set of class-agnostic boxes into class-specific boxes
by applying the predicted offsets (box_deltas)
"""
if
boxes
.
shape
[
0
]
==
0
:
return
np
.
zeros
((
0
,
box_deltas
.
shape
[
1
]))
...
...
@@ -105,6 +140,7 @@ def _bbox_pred(boxes, box_deltas):
return
pred_boxes
def
_clip_boxes
(
boxes
,
im_shape
):
"""Clip boxes to image boundaries."""
# x1 >= 0
boxes
[:,
0
::
4
]
=
np
.
maximum
(
boxes
[:,
0
::
4
],
0
)
# y1 >= 0
...
...
@@ -116,6 +152,18 @@ def _clip_boxes(boxes, im_shape):
return
boxes
def
im_detect
(
net
,
im
,
boxes
):
"""Detect object classes in an image given object proposals.
Arguments:
net (caffe.Net): Fast R-CNN network to use
im (ndarray): color image to test (in BGR order)
boxes (ndarray): R x 4 array of object proposals
Returns:
scores (ndarray): R x K array of object class scores (K includes
background as object category 0)
boxes (ndarray): R x (4*K) array of predicted bounding boxes
"""
blobs
,
unused_im_scale_factors
=
_get_blobs
(
im
,
boxes
)
# When mapping from image ROIs to feature map ROIs, there's some aliasing
...
...
@@ -124,10 +172,10 @@ def im_detect(net, im, boxes):
# on the unique subset.
if
cfg
.
DEDUP_BOXES
>
0
:
v
=
np
.
array
([
1
,
1e3
,
1e6
,
1e9
,
1e12
])
hashes
=
np
.
round
(
blobs
[
'rois'
]
[:,
:,
0
,
0
]
*
cfg
.
DEDUP_BOXES
).
dot
(
v
)
hashes
=
np
.
round
(
blobs
[
'rois'
]
*
cfg
.
DEDUP_BOXES
).
dot
(
v
)
_
,
index
,
inv_index
=
np
.
unique
(
hashes
,
return_index
=
True
,
return_inverse
=
True
)
blobs
[
'rois'
]
=
blobs
[
'rois'
][
index
,
:,
:,
:]
blobs
[
'rois'
]
=
blobs
[
'rois'
][
index
,
:]
boxes
=
boxes
[
index
,
:]
# reshape network inputs
...
...
@@ -160,6 +208,7 @@ def im_detect(net, im, boxes):
return
scores
,
pred_boxes
def
_vis_detections
(
im
,
class_name
,
dets
,
thresh
=
0.3
):
"""Visual debugging of detections."""
import
matplotlib.pyplot
as
plt
im
=
im
[:,
:,
(
2
,
1
,
0
)]
for
i
in
xrange
(
np
.
minimum
(
10
,
dets
.
shape
[
0
])):
...
...
@@ -178,6 +227,9 @@ def _vis_detections(im, class_name, dets, thresh=0.3):
plt
.
pause
(
1
)
def
apply_nms
(
all_boxes
,
thresh
):
"""Apply non-maximum suppression to all predicted boxes output by the
test_net method.
"""
num_classes
=
len
(
all_boxes
)
num_images
=
len
(
all_boxes
[
0
])
nms_boxes
=
[[[]
for
_
in
xrange
(
num_images
)]
...
...
@@ -194,6 +246,7 @@ def apply_nms(all_boxes, thresh):
return
nms_boxes
def
test_net
(
net
,
imdb
):
"""Test a Fast R-CNN network on an image database."""
num_images
=
len
(
imdb
.
image_index
)
# heuristic: keep an average of 40 detections per class per images prior
# to NMS
...
...
lib/fast_rcnn/train.py
View file @
d3c52550
...
...
@@ -5,6 +5,8 @@
# Written by Ross Girshick
# --------------------------------------------------------
"""Train a Fast R-CNN network."""
import
caffe
from
fast_rcnn.config
import
cfg
import
roi_data_layer.roidb
as
rdl_roidb
...
...
@@ -16,8 +18,14 @@ from caffe.proto import caffe_pb2
import
google.protobuf
as
pb2
class
SolverWrapper
(
object
):
"""A simple wrapper around Caffe's solver.
This wrapper gives us control over he snapshotting process, which we
use to unnormalize the learned bounding-box regression weights.
"""
def
__init__
(
self
,
solver_prototxt
,
roidb
,
output_dir
,
pretrained_model
=
None
):
"""Initialize the SolverWrapper."""
self
.
output_dir
=
output_dir
print
'Computing bounding-box regression targets...'
...
...
@@ -38,6 +46,9 @@ class SolverWrapper(object):
self
.
solver
.
net
.
layers
[
0
].
set_roidb
(
roidb
)
def
snapshot
(
self
):
"""Take a snapshot of the network after unnormalizing the learned
bounding-box regression weights. This enables easy use at test-time.
"""
net
=
self
.
solver
.
net
if
cfg
.
TRAIN
.
BBOX_REG
:
...
...
@@ -71,6 +82,7 @@ class SolverWrapper(object):
net
.
params
[
'bbox_pred'
][
1
].
data
[...]
=
orig_1
def
train_model
(
self
,
max_iters
):
"""Network training loop."""
last_snapshot_iter
=
-
1
timer
=
Timer
()
while
self
.
solver
.
iter
<
max_iters
:
...
...
@@ -89,6 +101,7 @@ class SolverWrapper(object):
self
.
snapshot
()
def
get_training_roidb
(
imdb
):
"""Returns a roidb (Region of Interest database) for use in training."""
if
cfg
.
TRAIN
.
USE_FLIPPED
:
print
'Appending horizontally-flipped training examples...'
imdb
.
append_flipped_images
()
...
...
@@ -102,6 +115,7 @@ def get_training_roidb(imdb):
def
train_net
(
solver_prototxt
,
roidb
,
output_dir
,
pretrained_model
=
None
,
max_iters
=
40000
):
"""Train a Fast R-CNN network."""
sw
=
SolverWrapper
(
solver_prototxt
,
roidb
,
output_dir
,
pretrained_model
=
pretrained_model
)
...
...
lib/roi_data_layer/layer.py
View file @
d3c52550
...
...
@@ -5,6 +5,11 @@
# Written by Ross Girshick
# --------------------------------------------------------
"""The data layer used during training to train a Fast R-CNN network.
RoIDataLayer implements a Caffe Python layer.
"""
import
caffe
from
fast_rcnn.config
import
cfg
from
roi_data_layer.minibatch
import
get_minibatch
...
...
@@ -13,13 +18,15 @@ import yaml
from
multiprocessing
import
Process
,
queues
class
RoIDataLayer
(
caffe
.
Layer
):
"""Fast R-CNN data layer."""
"""Fast R-CNN data layer
used for training
."""
def
_shuffle_roidb_inds
(
self
):
"""Randomly permute the training roidb."""
self
.
_perm
=
np
.
random
.
permutation
(
np
.
arange
(
len
(
self
.
_roidb
)))
self
.
_cur
=
0
def
_get_next_minibatch_inds
(
self
):
"""Return the roidb indices for the next minibatch."""
if
self
.
_cur
+
cfg
.
TRAIN
.
IMS_PER_BATCH
>=
len
(
self
.
_roidb
):
self
.
_shuffle_roidb_inds
()
...
...
@@ -29,10 +36,17 @@ class RoIDataLayer(caffe.Layer):
@
staticmethod
def
_prefetch
(
minibatch_db
,
num_classes
,
output_queue
):
"""Prefetch minibatch blobs (if enabled cfg.TRAIN.USE_PREFETCH)."""
blobs
=
get_minibatch
(
minibatch_db
,
num_classes
)
output_queue
.
put
(
blobs
)
def
_get_next_minibatch
(
self
):
"""Return the blobs to be used for the next minibatch.
If cfg.TRAIN.USE_PREFETCH is True, then blobs will be computed in a
separate process and made available through the self._prefetch_queue
queue.
"""
db_inds
=
self
.
_get_next_minibatch_inds
()
minibatch_db
=
[
self
.
_roidb
[
i
]
for
i
in
db_inds
]
if
cfg
.
TRAIN
.
USE_PREFETCH
:
...
...
@@ -45,12 +59,14 @@ class RoIDataLayer(caffe.Layer):
return
get_minibatch
(
minibatch_db
,
self
.
_num_classes
)
def
set_roidb
(
self
,
roidb
):
"""Set the roidb to be used by this layer during training."""
self
.
_roidb
=
roidb
self
.
_shuffle_roidb_inds
()
if
cfg
.
TRAIN
.
USE_PREFETCH
:
self
.
_get_next_minibatch
()
def
setup
(
self
,
bottom
,
top
):
"""Setup the RoIDataLayer."""
if
cfg
.
TRAIN
.
USE_PREFETCH
:
self
.
_prefetch_process
=
None
self
.
_prefetch_queue
=
queues
.
SimpleQueue
()
...
...
@@ -78,6 +94,7 @@ class RoIDataLayer(caffe.Layer):
top
[
4
].
reshape
(
1
,
self
.
_num_classes
*
4
)
def
forward
(
self
,
bottom
,
top
):
"""Get blobs and copy them into this layer's top blob vector."""
if
cfg
.
TRAIN
.
USE_PREFETCH
:
blobs
=
self
.
_prefetch_queue
.
get
()
self
.
_get_next_minibatch
()
...
...
lib/roi_data_layer/minibatch.py
View file @
d3c52550
...
...
@@ -5,6 +5,8 @@
# Written by Ross Girshick
# --------------------------------------------------------
"""Compute minibatch blobs for training a Fast R-CNN network."""
import
numpy
as
np
import
numpy.random
as
npr
import
cv2
...
...
@@ -12,9 +14,7 @@ from fast_rcnn.config import cfg
from
utils.blob
import
prep_im_for_blob
,
im_list_to_blob
def
get_minibatch
(
roidb
,
num_classes
):
"""
Given a roidb, construct a minibatch sampled from it.
"""
"""Given a roidb, construct a minibatch sampled from it."""
num_images
=
len
(
roidb
)
# Sample random scales to use for each image in this batch
random_scale_inds
=
npr
.
randint
(
0
,
high
=
len
(
cfg
.
TRAIN
.
SCALES
),
...
...
@@ -40,7 +40,7 @@ def get_minibatch(roidb, num_classes):
num_classes
)
# Add to RoIs blob
rois
=
_
scale
_im_rois
(
im_rois
,
im_scales
[
im_i
])
rois
=
_
project
_im_rois
(
im_rois
,
im_scales
[
im_i
])
batch_ind
=
im_i
*
np
.
ones
((
rois
.
shape
[
0
],
1
))
rois_blob_this_image
=
np
.
hstack
((
batch_ind
,
rois
))
rois_blob
=
np
.
vstack
((
rois_blob
,
rois_blob_this_image
))
...
...
@@ -65,8 +65,7 @@ def get_minibatch(roidb, num_classes):
return
blobs
def
_sample_rois
(
roidb
,
fg_rois_per_image
,
rois_per_image
,
num_classes
):
"""
Generate a random sample of RoIs comprising foreground and background
"""Generate a random sample of RoIs comprising foreground and background
examples.
"""
# label = class RoI has max overlap with
...
...
@@ -113,8 +112,8 @@ def _sample_rois(roidb, fg_rois_per_image, rois_per_image, num_classes):
return
labels
,
overlaps
,
rois
,
bbox_targets
,
bbox_loss_weights
def
_get_image_blob
(
roidb
,
scale_inds
):
"""
Builds an input blob from the images in the roidb at the specified
scales.
"""
Builds an input blob from the images in the roidb at the specified
scales.
"""
num_images
=
len
(
roidb
)
processed_ims
=
[]
...
...
@@ -134,20 +133,22 @@ def _get_image_blob(roidb, scale_inds):
return
blob
,
im_scales
def
_scale_im_rois
(
im_rois
,
im_scale_factor
):
def
_project_im_rois
(
im_rois
,
im_scale_factor
):
"""Project image RoIs into the rescaled training image."""
rois
=
im_rois
*
im_scale_factor
return
rois
def
_get_bbox_regression_labels
(
bbox_target_data
,
num_classes
):
"""
Bounding-box regression targets are stored in a compact form in the roidb.
"""Bounding-box regression targets are stored in a compact form in the
roidb.
This function expands those targets into the 4-of-4*K representation used
by the network (i.e. only one class has non-zero targets). The loss weights
are similarly expanded.
Returns:
(N, K *
4
)
blob of regression targets
(N, K *
4
)
blob of loss weights
bbox_target_data (ndarray): N x
4
K
blob of regression targets
bbox_loss_weights (ndarray): N x
4
K
blob of loss weights
"""
clss
=
bbox_target_data
[:,
0
]
bbox_targets
=
np
.
zeros
((
clss
.
size
,
4
*
num_classes
),
dtype
=
np
.
float32
)
...
...
lib/roi_data_layer/roidb.py
View file @
d3c52550
...
...
@@ -5,13 +5,14 @@
# Written by Ross Girshick
# --------------------------------------------------------
"""Transform a roidb into a trainable roidb by adding a bunch of metadata."""
import
numpy
as
np
from
fast_rcnn.config
import
cfg
import
utils.cython_bbox
def
prepare_roidb
(
imdb
):
"""
Enrich the imdb's roidb by adding some derived quantities that
"""Enrich the imdb's roidb by adding some derived quantities that
are useful for training. This function precomputes the maximum
overlap, taken over ground-truth boxes, between each ROI and
each ground-truth box. The class with maximum overlap is also
...
...
@@ -37,6 +38,7 @@ def prepare_roidb(imdb):
assert
all
(
max_classes
[
nonzero_inds
]
!=
0
)
def
add_bbox_regression_targets
(
roidb
):
"""Add information needed to train bounding-box regressors."""
assert
len
(
roidb
)
>
0
assert
'max_classes'
in
roidb
[
0
],
'Did you call prepare_roidb first?'
...
...
@@ -80,6 +82,7 @@ def add_bbox_regression_targets(roidb):
return
means
.
ravel
(),
stds
.
ravel
()
def
_compute_targets
(
rois
,
overlaps
,
labels
):
"""Compute bounding-box regression targets for an image."""
# Ensure ROIs are floats
rois
=
rois
.
astype
(
np
.
float
,
copy
=
False
)
...
...
lib/utils/blob.py
View file @
d3c52550
...
...
@@ -5,10 +5,16 @@
# Written by Ross Girshick
# --------------------------------------------------------
"""Blob helper functions."""
import
numpy
as
np
import
cv2
def
im_list_to_blob
(
ims
):
"""Convert a list of images into a network input.
Assumes images are already prepared (means subtracted, BGR order, ...).
"""
max_shape
=
np
.
array
([
im
.
shape
for
im
in
ims
]).
max
(
axis
=
0
)
num_images
=
len
(
ims
)
blob
=
np
.
zeros
((
num_images
,
max_shape
[
0
],
max_shape
[
1
],
3
),
...
...
@@ -23,6 +29,7 @@ def im_list_to_blob(ims):
return
blob
def
prep_im_for_blob
(
im
,
pixel_means
,
target_size
,
max_size
):
"""Mean subtract and scale an image for use in a blob."""
im
=
im
.
astype
(
np
.
float32
,
copy
=
False
)
im
-=
pixel_means
im_shape
=
im
.
shape
...
...
lib/utils/timer.py
View file @
d3c52550
...
...
@@ -8,6 +8,7 @@
import
time
class
Timer
(
object
):
"""A simple timer."""
def
__init__
(
self
):
self
.
total_time
=
0.
self
.
calls
=
0
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment