% -------------------------------------------------------- % Fast R-CNN % Copyright (c) 2015 Microsoft % Licensed under The MIT License [see LICENSE for details] % Written by Ross Girshick % -------------------------------------------------------- function dets = fast_rcnn_im_detect(model, im, boxes) % Perform detection a Fast R-CNN network given an image and % object proposals. if model.init_key ~= caffe('get_init_key') error('You probably need call fast_rcnn_load_net() first.'); end [im_batch, scales] = image_pyramid(im, model.pixel_means, false); [feat_pyra_boxes, feat_pyra_levels] = project_im_rois(boxes, scales); rois = cat(2, feat_pyra_levels, feat_pyra_boxes); % Adjust to 0-based indexing and make roi info the fastest dimension rois = rois - 1; rois = permute(rois, [2 1]); input_blobs = cell(2, 1); input_blobs{1} = im_batch; input_blobs{2} = rois; th = tic(); blobs_out = caffe('forward', input_blobs); fprintf('fwd: %.3fs\n', toc(th)); bbox_deltas = squeeze(blobs_out{1})'; probs = squeeze(blobs_out{2})'; num_classes = size(probs, 2); dets = cell(num_classes - 1, 1); NMS_THRESH = 0.3; % class index 1 is __background__, so we don't return it for j = 2:num_classes cls_probs = probs(:, j); cls_deltas = bbox_deltas(:, (1 + (j - 1) * 4):(j * 4)); pred_boxes = bbox_pred(boxes, cls_deltas); cls_dets = [pred_boxes cls_probs]; keep = nms(cls_dets, NMS_THRESH); cls_dets = cls_dets(keep, :); dets{j - 1} = cls_dets; end % ------------------------------------------------------------------------ function [batch, scales] = image_pyramid(im, pixel_means, multiscale) % ------------------------------------------------------------------------ % Construct an image pyramid that's ready for feeding directly into caffe if ~multiscale SCALES = [600]; MAX_SIZE = 1000; else SCALES = [1200 864 688 576 480]; MAX_SIZE = 2000; end num_levels = length(SCALES); im = single(im); % Convert to BGR im = im(:, :, [3 2 1]); % Subtract mean (mean of the image mean--one mean per channel) im = bsxfun(@minus, im, pixel_means); im_orig = im; im_size = min([size(im_orig, 1) size(im_orig, 2)]); im_size_big = max([size(im_orig, 1) size(im_orig, 2)]); scale_factors = SCALES ./ im_size; max_size = [0 0 0]; for i = 1:num_levels if round(im_size_big * scale_factors(i)) > MAX_SIZE scale_factors(i) = MAX_SIZE / im_size_big; end ims{i} = imresize(im_orig, scale_factors(i), 'bilinear', ... 'antialiasing', false); max_size = max(cat(1, max_size, size(ims{i})), [], 1); end batch = zeros(max_size(2), max_size(1), 3, num_levels, 'single'); for i = 1:num_levels im = ims{i}; im_sz = size(im); im_sz = im_sz(1:2); % Make width the fastest dimension (for caffe) im = permute(im, [2 1 3]); batch(1:im_sz(2), 1:im_sz(1), :, i) = im; end scales = scale_factors'; % ------------------------------------------------------------------------ function [boxes, levels] = project_im_rois(boxes, scales) % ------------------------------------------------------------------------ widths = boxes(:,3) - boxes(:,1) + 1; heights = boxes(:,4) - boxes(:,2) + 1; areas = widths .* heights; scaled_areas = bsxfun(@times, areas, (scales.^2)'); diff_areas = abs(scaled_areas - (224 * 224)); [~, levels] = min(diff_areas, [], 2); boxes = boxes - 1; boxes = bsxfun(@times, boxes, scales(levels)); boxes = boxes + 1; % ------------------------------------------------------------------------ function pred_boxes = bbox_pred(boxes, bbox_deltas) % ------------------------------------------------------------------------ if isempty(boxes) pred_boxes = []; return; end Y = bbox_deltas; % Read out predictions dst_ctr_x = Y(:, 1); dst_ctr_y = Y(:, 2); dst_scl_x = Y(:, 3); dst_scl_y = Y(:, 4); src_w = boxes(:, 3) - boxes(:, 1) + eps; src_h = boxes(:, 4) - boxes(:, 2) + eps; src_ctr_x = boxes(:, 1) + 0.5 * src_w; src_ctr_y = boxes(:, 2) + 0.5 * src_h; pred_ctr_x = (dst_ctr_x .* src_w) + src_ctr_x; pred_ctr_y = (dst_ctr_y .* src_h) + src_ctr_y; pred_w = exp(dst_scl_x) .* src_w; pred_h = exp(dst_scl_y) .* src_h; pred_boxes = [pred_ctr_x - 0.5 * pred_w, pred_ctr_y - 0.5 * pred_h, ... pred_ctr_x + 0.5 * pred_w, pred_ctr_y + 0.5 * pred_h];