fast_rcnn_im_detect.m 4.11 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
% --------------------------------------------------------
% Fast R-CNN
% Copyright (c) 2015 Microsoft
% Licensed under The MIT License [see LICENSE for details]
% Written by Ross Girshick
% --------------------------------------------------------

function dets = fast_rcnn_im_detect(model, im, boxes)
% Perform detection a Fast R-CNN network given an image and
% object proposals.

if model.init_key ~= caffe('get_init_key')
  error('You probably need call fast_rcnn_load_net() first.');
end

[im_batch, scales] = image_pyramid(im, model.pixel_means, false);

[feat_pyra_boxes, feat_pyra_levels] = project_im_rois(boxes, scales);
rois = cat(2, feat_pyra_levels, feat_pyra_boxes);
% Adjust to 0-based indexing and make roi info the fastest dimension
rois = rois - 1;
rois = permute(rois, [2 1]);

input_blobs = cell(2, 1);
input_blobs{1} = im_batch;
input_blobs{2} = rois;
th = tic();
blobs_out = caffe('forward', input_blobs);
fprintf('fwd: %.3fs\n', toc(th));

bbox_deltas = squeeze(blobs_out{1})';
probs = squeeze(blobs_out{2})';

num_classes = size(probs, 2);
dets = cell(num_classes - 1, 1);
NMS_THRESH = 0.3;
% class index 1 is __background__, so we don't return it
for j = 2:num_classes
  cls_probs = probs(:, j);
  cls_deltas = bbox_deltas(:, (1 + (j - 1) * 4):(j * 4));
  pred_boxes = bbox_pred(boxes, cls_deltas);
  cls_dets = [pred_boxes cls_probs];
  keep = nms(cls_dets, NMS_THRESH);
  cls_dets = cls_dets(keep, :);
  dets{j - 1} = cls_dets;
end

% ------------------------------------------------------------------------
function [batch, scales] = image_pyramid(im, pixel_means, multiscale)
% ------------------------------------------------------------------------
% Construct an image pyramid that's ready for feeding directly into caffe
if ~multiscale
  SCALES = [600];
  MAX_SIZE = 1000;
else
  SCALES = [1200 864 688 576 480];
  MAX_SIZE = 2000;
end
num_levels = length(SCALES);

im = single(im);
% Convert to BGR
im = im(:, :, [3 2 1]);
% Subtract mean (mean of the image mean--one mean per channel)
im = bsxfun(@minus, im, pixel_means);

im_orig = im;
im_size = min([size(im_orig, 1) size(im_orig, 2)]);
im_size_big = max([size(im_orig, 1) size(im_orig, 2)]);
scale_factors = SCALES ./ im_size;

max_size = [0 0 0];
for i = 1:num_levels
  if round(im_size_big * scale_factors(i)) > MAX_SIZE
    scale_factors(i) = MAX_SIZE / im_size_big;
  end
  ims{i} = imresize(im_orig, scale_factors(i), 'bilinear', ...
                    'antialiasing', false);
  max_size = max(cat(1, max_size, size(ims{i})), [], 1);
end

batch = zeros(max_size(2), max_size(1), 3, num_levels, 'single');
for i = 1:num_levels
  im = ims{i};
  im_sz = size(im);
  im_sz = im_sz(1:2);
  % Make width the fastest dimension (for caffe)
  im = permute(im, [2 1 3]);
  batch(1:im_sz(2), 1:im_sz(1), :, i) = im;
end
scales = scale_factors';

% ------------------------------------------------------------------------
function [boxes, levels] = project_im_rois(boxes, scales)
% ------------------------------------------------------------------------
widths = boxes(:,3) - boxes(:,1) + 1;
heights = boxes(:,4) - boxes(:,2) + 1;

areas = widths .* heights;
scaled_areas = bsxfun(@times, areas, (scales.^2)');
diff_areas = abs(scaled_areas - (224 * 224));
[~, levels] = min(diff_areas, [], 2);

boxes = boxes - 1;
boxes = bsxfun(@times, boxes, scales(levels));
boxes = boxes + 1;

% ------------------------------------------------------------------------
function pred_boxes = bbox_pred(boxes, bbox_deltas)
% ------------------------------------------------------------------------
if isempty(boxes)
  pred_boxes = [];
  return;
end

Y = bbox_deltas;

% Read out predictions
dst_ctr_x = Y(:, 1);
dst_ctr_y = Y(:, 2);
dst_scl_x = Y(:, 3);
dst_scl_y = Y(:, 4);

src_w = boxes(:, 3) - boxes(:, 1) + eps;
src_h = boxes(:, 4) - boxes(:, 2) + eps;
src_ctr_x = boxes(:, 1) + 0.5 * src_w;
src_ctr_y = boxes(:, 2) + 0.5 * src_h;

pred_ctr_x = (dst_ctr_x .* src_w) + src_ctr_x;
pred_ctr_y = (dst_ctr_y .* src_h) + src_ctr_y;
pred_w = exp(dst_scl_x) .* src_w;
pred_h = exp(dst_scl_y) .* src_h;
pred_boxes = [pred_ctr_x - 0.5 * pred_w, pred_ctr_y - 0.5 * pred_h, ...
              pred_ctr_x + 0.5 * pred_w, pred_ctr_y + 0.5 * pred_h];