Commit ca9110ae authored by Ross Girshick's avatar Ross Girshick
Browse files

improve demo; update models to caffe's v1 proto messages

parent 7c99b2fd
......@@ -7,6 +7,7 @@ data/caffe_nets
data/rcnn_models
data/selective_search_data
data/window_files
data/splits
rcnn_config_local.m
cachedir/*
external/caffe
## Contents -- Precomputed R-CNN Models
### R-CNN detectors trained on ILSVRC2013
* `./rcnn_models/ilsvrc2013/rcnn_model.mat`
* Uses `./caffe_nets/finetune_ilsvrc13_val1+train1k_iter_50000`
* This detector was trained on val (i.e. val1+val2) plus up to 1000 images per category from train
* `./rcnn_models/ilsvrc2013/bbox_regressor_final.mat`
* Bounding-box regressor that goes with `./rcnn_models/ilsvrc2013/rcnn_model.mat`
### Caffe CNN
* `./caffe_nets/finetune_ilsvrc13_val1+train1k_iter_50000`
* CNN fine-tuned on val1 plus up to 1000 images per category from train
* initialized from `./caffe_nets/ilsvrc_2012_train_iter_310k` in the main R-CNN data package
### Selective search boxes on val1, val2, val, and test
./selective_search_data/ilsvrc13_val1.mat
./selective_search_data/ilsvrc13_val2.mat
./selective_search_data/ilsvrc13_test.mat
./selective_search_data/ilsvrc13_val.mat
### Image lists in the val1/val2 split (blacklisted images are pre-filtered)
./splits/val1.txt
./splits/val2.txt
function rcnn_demo(use_gpu)
% rcnn_demo(use_gpu)
function rcnn_demo(demo_choice, use_gpu)
% rcnn_demo(demo_choice, use_gpu)
% Run the R-CNN demo on a test image. Set use_gpu = false to run
% in CPU mode. (GPU mode is the default.)
% demo_choice selects between fine-tuned R-CNN models trained on
% 'PASCAL' or 'ILSVRC13'
% AUTORIGHTS
% ---------------------------------------------------------
......@@ -13,7 +15,22 @@ function rcnn_demo(use_gpu)
% this file (or any portion of it) in your project.
% ---------------------------------------------------------
rcnn_model_file = './data/rcnn_models/voc_2012/rcnn_model_finetuned.mat';
if ~exist('demo_choice', 'var') || isempty(demo_choice)
demo_choice = 'PASCAL';
end
switch demo_choice
case 'PASCAL'
% Example using the PASCAL VOC 2007 fine-tuned detectors (20 classes)
rcnn_model_file = './data/rcnn_models/voc_2012/rcnn_model_finetuned.mat';
im = imread('./examples/images/000084.jpg');
case 'ILSVRC13'
% Example using the ILSVRC13 fine-tuned detectors (200 classes)
rcnn_model_file = './data/rcnn_models/ilsvrc2013/rcnn_model.mat';
im = imread('./examples/images/fish-bike.jpg');
otherwise
error('unknown demo ''%s'' [valid options: ''PASCAL'' or ''ILSVRC13'']', demo_choice);
end
if ~exist(rcnn_model_file, 'file')
error('You need to download the R-CNN precomputed models. See README.md for details.');
......@@ -25,27 +42,38 @@ end
modes = {'CPU', 'GPU'};
fprintf('~~~~~~~~~~~~~~~~~~~\n');
fprintf('Welcome to the %s demo\n', demo_choice);
fprintf('Running in %s mode\n', modes{use_gpu+1});
fprintf('(To run in %s mode, call rcnn_demo(%d) instead)\n', ...
fprintf('(To run in %s mode, call rcnn_demo(demo_choice, %d) instead)\n', ...
modes{~use_gpu+1}, ~use_gpu);
fprintf('Press any key to continue\n');
pause;
% Initialization only needs to happen once (so this time isn't counted
% when timing detection).
fprintf('Initializing R-CNN model (this might take a little while)\n');
rcnn_model = rcnn_load_model(rcnn_model_file, use_gpu);
fprintf('done\n');
im = imread('./000084.jpg');
th = tic;
dets = rcnn_detect(im, rcnn_model);
fprintf('Total %d-class detection time: %.3fs\n', ...
length(rcnn_model.classes), toc(th));
% show top scoring bicycle detection
showboxes(im, dets{2}(1,:));
title(sprintf('bicycle conf = %.3f', dets{2}(1,end)));
ind = strmatch('bicycle', rcnn_model.classes);
showboxes(im, dets{ind}(1,:));
title(sprintf('bicycle score = %.3f', dets{ind}(1,end)));
drawnow;
fprintf('Press any key to see the top scoring person detection\n');
fprintf('Showing the top scoring bicycle detection\n');
fprintf('Press any key to continue\n');
pause;
% show top scoring person detection
showboxes(im, dets{15}(1,:));
title(sprintf('person conf = %.3f', dets{15}(1,end)));
ind = strmatch('person', rcnn_model.classes);
showboxes(im, dets{ind}(1,:));
title(sprintf('person score = %.3f', dets{ind}(1,end)));
drawnow;
fprintf('Showing the top scoring person detection\n');
......@@ -13,7 +13,8 @@ function dets = rcnn_detect(im, rcnn_model)
fprintf('Computing candidate regions...');
th = tic();
fast_mode = true;
boxes = selective_search_boxes(im, fast_mode);
im_width = 500;
boxes = selective_search_boxes(im, fast_mode, im_width);
% compat: change coordinate order from [y1 x1 y2 x2] to [x1 y1 x2 y2]
boxes = boxes(:, [2 1 4 3]);
fprintf('found %d candidates (in %.3fs).\n', size(boxes,1), toc(th));
......@@ -25,12 +26,15 @@ feat = rcnn_features(im, boxes, rcnn_model);
feat = rcnn_scale_features(feat, rcnn_model.training_opts.feat_norm_mean);
fprintf('done (in %.3fs).\n', toc(th));
fprintf('Scoring regions with detectors and applying NMS...');
% compute scores for each candidate [num_boxes x num_classes]
fprintf('Scoring regions with detectors...');
th = tic();
scores = bsxfun(@plus, feat*rcnn_model.detectors.W, rcnn_model.detectors.B);
fprintf('done (in %.3fs)\n', toc(th));
% apply NMS to each class and return final scored detections
fprintf('Applying NMS...');
th = tic();
num_classes = length(rcnn_model.classes);
dets = cell(num_classes, 1);
for i = 1:num_classes
......
input: "data"
input_dim: 10
input_dim: 3
input_dim: 227
input_dim: 227
layers {
layer {
name: "conv1"
type: "conv"
num_output: 96
kernelsize: 11
stride: 4
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.
}
blobs_lr: 1.
blobs_lr: 2.
weight_decay: 1.
weight_decay: 0.
}
bottom: "data"
top: "conv1"
}
layers {
layer {
name: "relu1"
type: "relu"
}
bottom: "conv1"
top: "conv1"
}
layers {
layer {
name: "pool1"
type: "pool"
pool: MAX
kernelsize: 3
stride: 2
}
bottom: "conv1"
top: "pool1"
}
layers {
layer {
name: "norm1"
type: "lrn"
local_size: 5
alpha: 0.0001
beta: 0.75
}
bottom: "pool1"
top: "norm1"
}
layers {
layer {
name: "pad2"
type: "padding"
pad: 2
}
bottom: "norm1"
top: "pad2"
}
layers {
layer {
name: "conv2"
type: "conv"
num_output: 256
group: 2
kernelsize: 5
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1.
}
blobs_lr: 1.
blobs_lr: 2.
weight_decay: 1.
weight_decay: 0.
}
bottom: "pad2"
top: "conv2"
}
layers {
layer {
name: "relu2"
type: "relu"
}
bottom: "conv2"
top: "conv2"
}
layers {
layer {
name: "pool2"
type: "pool"
pool: MAX
kernelsize: 3
stride: 2
}
bottom: "conv2"
top: "pool2"
}
layers {
layer {
name: "norm2"
type: "lrn"
local_size: 5
alpha: 0.0001
beta: 0.75
}
bottom: "pool2"
top: "norm2"
}
layers {
layer {
name: "pad3"
type: "padding"
pad: 1
}
bottom: "norm2"
top: "pad3"
}
layers {
layer {
name: "conv3"
type: "conv"
num_output: 384
kernelsize: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.
}
blobs_lr: 1.
blobs_lr: 2.
weight_decay: 1.
weight_decay: 0.
}
bottom: "pad3"
top: "conv3"
}
layers {
layer {
name: "relu3"
type: "relu"
}
bottom: "conv3"
top: "conv3"
}
layers {
layer {
name: "pad4"
type: "padding"
pad: 1
}
bottom: "conv3"
top: "pad4"
}
layers {
layer {
name: "conv4"
type: "conv"
num_output: 384
group: 2
kernelsize: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1.
}
blobs_lr: 1.
blobs_lr: 2.
weight_decay: 1.
weight_decay: 0.
}
bottom: "pad4"
top: "conv4"
}
layers {
layer {
name: "relu4"
type: "relu"
}
bottom: "conv4"
top: "conv4"
}
layers {
layer {
name: "pad5"
type: "padding"
pad: 1
}
bottom: "conv4"
top: "pad5"
}
layers {
layer {
name: "conv5"
type: "conv"
num_output: 256
group: 2
kernelsize: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1.
}
blobs_lr: 1.
blobs_lr: 2.
weight_decay: 1.
weight_decay: 0.
}
bottom: "pad5"
top: "conv5"
}
layers {
layer {
name: "relu5"
type: "relu"
}
bottom: "conv5"
top: "conv5"
}
layers {
layer {
name: "pool5"
type: "pool"
kernelsize: 3
pool: MAX
stride: 2
}
bottom: "conv5"
top: "pool5"
}
layers {
layer {
name: "fc6"
type: "innerproduct"
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 1.
}
blobs_lr: 1.
blobs_lr: 2.
weight_decay: 1.
weight_decay: 0.
}
bottom: "pool5"
top: "fc6"
}
layers {
layer {
name: "relu6"
type: "relu"
}
bottom: "fc6"
top: "fc6"
}
layers {
layer {
name: "drop6"
type: "dropout"
dropout_ratio: 0.5
}
bottom: "fc6"
top: "fc6"
}
layers {
layer {
name: "fc7"
type: "innerproduct"
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 1.
}
blobs_lr: 1.
blobs_lr: 2.
weight_decay: 1.
weight_decay: 0.
}
bottom: "fc6"
top: "fc7"
}
layers {
layer {
name: "relu7"
type: "relu"
}
bottom: "fc7"
top: "fc7"
}
layers {
layer {
name: "drop7"
type: "dropout"
dropout_ratio: 0.5
}
bottom: "fc7"
top: "fc7"
}
layers {
layer {
name: "fc8"
type: "innerproduct"
num_output: 1000
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
blobs_lr: 1.
blobs_lr: 2.
weight_decay: 1.
weight_decay: 0.
}
bottom: "fc7"
top: "fc8"
}
layers {
layer {
name: "prob"
type: "softmax"
}
bottom: "fc8"
top: "prob"
}
input: "data"
input_dim: 256
input_dim: 3
input_dim: 227
input_dim: 227
layers {
layer {
name: "conv1"
type: "conv"
num_output: 96
kernelsize: 11
stride: 4
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.
}
blobs_lr: 1.
blobs_lr: 2.
weight_decay: 1.
weight_decay: 0.
}
bottom: "data"
top: "conv1"
}
layers {
layer {
name: "relu1"
type: "relu"
}
bottom: "conv1"
top: "conv1"
}
layers {
layer {
name: "pool1"
type: "pool"
pool: MAX
kernelsize: 3
stride: 2
}
bottom: "conv1"
top: "pool1"
}
layers {
layer {
name: "norm1"
type: "lrn"
local_size: 5
alpha: 0.0001
beta: 0.75
}
bottom: "pool1"
top: "norm1"
}
layers {
layer {
name: "pad2"
type: "padding"
pad: 2
}
bottom: "norm1"
top: "pad2"
}
layers {
layer {
name: "conv2"
type: "conv"
num_output: 256
group: 2
kernelsize: 5
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1.
}
blobs_lr: 1.
blobs_lr: 2.
weight_decay: 1.
weight_decay: 0.
}
bottom: "pad2"
top: "conv2"
}
layers {
layer {
name: "relu2"
type: "relu"
}
bottom: "conv2"
top: "conv2"
}
layers {
layer {
name: "pool2"
type: "pool"
pool: MAX
kernelsize: 3
stride: 2
}
bottom: "conv2"
top: "pool2"
}
layers {
layer {
name: "norm2"
type: "lrn"
local_size: 5
alpha: 0.0001
beta: 0.75
}
bottom: "pool2"
top: "norm2"
}
layers {
layer {
name: "pad3"
type: "padding"
pad: 1
}
bottom: "norm2"
top: "pad3"
}
layers {
layer {
name: "conv3"
type: "conv"
num_output: 384
kernelsize: 3
weight_filler {
type: "gaussian"