objdetect.hpp
37.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_OBJDETECT_HPP__
#define __OPENCV_OBJDETECT_HPP__
#include "opencv2/core/core.hpp"
#ifdef __cplusplus
#include <map>
#include <deque>
extern "C" {
#endif
/****************************************************************************************\
* Haar-like Object Detection functions *
\****************************************************************************************/
#define CV_HAAR_MAGIC_VAL 0x42500000
#define CV_TYPE_NAME_HAAR "opencv-haar-classifier"
#define CV_IS_HAAR_CLASSIFIER( haar ) \
((haar) != NULL && \
(((const CvHaarClassifierCascade*)(haar))->flags & CV_MAGIC_MASK)==CV_HAAR_MAGIC_VAL)
#define CV_HAAR_FEATURE_MAX 3
typedef struct CvHaarFeature
{
int tilted;
struct
{
CvRect r;
float weight;
} rect[CV_HAAR_FEATURE_MAX];
} CvHaarFeature;
typedef struct CvHaarClassifier
{
int count;
CvHaarFeature* haar_feature;
float* threshold;
int* left;
int* right;
float* alpha;
} CvHaarClassifier;
typedef struct CvHaarStageClassifier
{
int count;
float threshold;
CvHaarClassifier* classifier;
int next;
int child;
int parent;
} CvHaarStageClassifier;
typedef struct CvHidHaarClassifierCascade CvHidHaarClassifierCascade;
typedef struct CvHaarClassifierCascade
{
int flags;
int count;
CvSize orig_window_size;
CvSize real_window_size;
double scale;
CvHaarStageClassifier* stage_classifier;
CvHidHaarClassifierCascade* hid_cascade;
} CvHaarClassifierCascade;
typedef struct CvAvgComp
{
CvRect rect;
int neighbors;
} CvAvgComp;
/* Loads haar classifier cascade from a directory.
It is obsolete: convert your cascade to xml and use cvLoad instead */
CVAPI(CvHaarClassifierCascade*) cvLoadHaarClassifierCascade(
const char* directory, CvSize orig_window_size);
CVAPI(void) cvReleaseHaarClassifierCascade( CvHaarClassifierCascade** cascade );
#define CV_HAAR_DO_CANNY_PRUNING 1
#define CV_HAAR_SCALE_IMAGE 2
#define CV_HAAR_FIND_BIGGEST_OBJECT 4
#define CV_HAAR_DO_ROUGH_SEARCH 8
//CVAPI(CvSeq*) cvHaarDetectObjectsForROC( const CvArr* image,
// CvHaarClassifierCascade* cascade, CvMemStorage* storage,
// CvSeq** rejectLevels, CvSeq** levelWeightds,
// double scale_factor CV_DEFAULT(1.1),
// int min_neighbors CV_DEFAULT(3), int flags CV_DEFAULT(0),
// CvSize min_size CV_DEFAULT(cvSize(0,0)), CvSize max_size CV_DEFAULT(cvSize(0,0)),
// bool outputRejectLevels = false );
CVAPI(CvSeq*) cvHaarDetectObjects( const CvArr* image,
CvHaarClassifierCascade* cascade, CvMemStorage* storage,
double scale_factor CV_DEFAULT(1.1),
int min_neighbors CV_DEFAULT(3), int flags CV_DEFAULT(0),
CvSize min_size CV_DEFAULT(cvSize(0,0)), CvSize max_size CV_DEFAULT(cvSize(0,0)));
/* sets images for haar classifier cascade */
CVAPI(void) cvSetImagesForHaarClassifierCascade( CvHaarClassifierCascade* cascade,
const CvArr* sum, const CvArr* sqsum,
const CvArr* tilted_sum, double scale );
/* runs the cascade on the specified window */
CVAPI(int) cvRunHaarClassifierCascade( const CvHaarClassifierCascade* cascade,
CvPoint pt, int start_stage CV_DEFAULT(0));
/****************************************************************************************\
* Latent SVM Object Detection functions *
\****************************************************************************************/
// DataType: STRUCT position
// Structure describes the position of the filter in the feature pyramid
// l - level in the feature pyramid
// (x, y) - coordinate in level l
typedef struct CvLSVMFilterPosition
{
int x;
int y;
int l;
} CvLSVMFilterPosition;
// DataType: STRUCT filterObject
// Description of the filter, which corresponds to the part of the object
// V - ideal (penalty = 0) position of the partial filter
// from the root filter position (V_i in the paper)
// penaltyFunction - vector describes penalty function (d_i in the paper)
// pf[0] * x + pf[1] * y + pf[2] * x^2 + pf[3] * y^2
// FILTER DESCRIPTION
// Rectangular map (sizeX x sizeY),
// every cell stores feature vector (dimension = p)
// H - matrix of feature vectors
// to set and get feature vectors (i,j)
// used formula H[(j * sizeX + i) * p + k], where
// k - component of feature vector in cell (i, j)
// END OF FILTER DESCRIPTION
typedef struct CvLSVMFilterObject{
CvLSVMFilterPosition V;
float fineFunction[4];
int sizeX;
int sizeY;
int numFeatures;
float *H;
} CvLSVMFilterObject;
// data type: STRUCT CvLatentSvmDetector
// structure contains internal representation of trained Latent SVM detector
// num_filters - total number of filters (root plus part) in model
// num_components - number of components in model
// num_part_filters - array containing number of part filters for each component
// filters - root and part filters for all model components
// b - biases for all model components
// score_threshold - confidence level threshold
typedef struct CvLatentSvmDetector
{
int num_filters;
int num_components;
int* num_part_filters;
CvLSVMFilterObject** filters;
float* b;
float score_threshold;
}
CvLatentSvmDetector;
// data type: STRUCT CvObjectDetection
// structure contains the bounding box and confidence level for detected object
// rect - bounding box for a detected object
// score - confidence level
typedef struct CvObjectDetection
{
CvRect rect;
float score;
} CvObjectDetection;
//////////////// Object Detection using Latent SVM //////////////
/*
// load trained detector from a file
//
// API
// CvLatentSvmDetector* cvLoadLatentSvmDetector(const char* filename);
// INPUT
// filename - path to the file containing the parameters of
- trained Latent SVM detector
// OUTPUT
// trained Latent SVM detector in internal representation
*/
CVAPI(CvLatentSvmDetector*) cvLoadLatentSvmDetector(const char* filename);
/*
// release memory allocated for CvLatentSvmDetector structure
//
// API
// void cvReleaseLatentSvmDetector(CvLatentSvmDetector** detector);
// INPUT
// detector - CvLatentSvmDetector structure to be released
// OUTPUT
*/
CVAPI(void) cvReleaseLatentSvmDetector(CvLatentSvmDetector** detector);
/*
// find rectangular regions in the given image that are likely
// to contain objects and corresponding confidence levels
//
// API
// CvSeq* cvLatentSvmDetectObjects(const IplImage* image,
// CvLatentSvmDetector* detector,
// CvMemStorage* storage,
// float overlap_threshold = 0.5f,
// int numThreads = -1);
// INPUT
// image - image to detect objects in
// detector - Latent SVM detector in internal representation
// storage - memory storage to store the resultant sequence
// of the object candidate rectangles
// overlap_threshold - threshold for the non-maximum suppression algorithm
= 0.5f [here will be the reference to original paper]
// OUTPUT
// sequence of detected objects (bounding boxes and confidence levels stored in CvObjectDetection structures)
*/
CVAPI(CvSeq*) cvLatentSvmDetectObjects(IplImage* image,
CvLatentSvmDetector* detector,
CvMemStorage* storage,
float overlap_threshold CV_DEFAULT(0.5f),
int numThreads CV_DEFAULT(-1));
#ifdef __cplusplus
}
CV_EXPORTS CvSeq* cvHaarDetectObjectsForROC( const CvArr* image,
CvHaarClassifierCascade* cascade, CvMemStorage* storage,
std::vector<int>& rejectLevels, std::vector<double>& levelWeightds,
double scale_factor CV_DEFAULT(1.1),
int min_neighbors CV_DEFAULT(3), int flags CV_DEFAULT(0),
CvSize min_size CV_DEFAULT(cvSize(0,0)), CvSize max_size CV_DEFAULT(cvSize(0,0)),
bool outputRejectLevels = false );
namespace cv
{
///////////////////////////// Object Detection ////////////////////////////
/*
* This is a class wrapping up the structure CvLatentSvmDetector and functions working with it.
* The class goals are:
* 1) provide c++ interface;
* 2) make it possible to load and detect more than one class (model) unlike CvLatentSvmDetector.
*/
class CV_EXPORTS LatentSvmDetector
{
public:
struct CV_EXPORTS ObjectDetection
{
ObjectDetection();
ObjectDetection( const Rect& rect, float score, int classID=-1 );
Rect rect;
float score;
int classID;
};
LatentSvmDetector();
LatentSvmDetector( const vector<string>& filenames, const vector<string>& classNames=vector<string>() );
virtual ~LatentSvmDetector();
virtual void clear();
virtual bool empty() const;
bool load( const vector<string>& filenames, const vector<string>& classNames=vector<string>() );
virtual void detect( const Mat& image,
vector<ObjectDetection>& objectDetections,
float overlapThreshold=0.5f,
int numThreads=-1 );
const vector<string>& getClassNames() const;
size_t getClassCount() const;
private:
vector<CvLatentSvmDetector*> detectors;
vector<string> classNames;
};
// class for grouping object candidates, detected by Cascade Classifier, HOG etc.
// instance of the class is to be passed to cv::partition (see cxoperations.hpp)
class CV_EXPORTS SimilarRects
{
public:
SimilarRects(double _eps) : eps(_eps) {}
inline bool operator()(const Rect& r1, const Rect& r2) const
{
double delta = eps*(std::min(r1.width, r2.width) + std::min(r1.height, r2.height))*0.5;
return std::abs(r1.x - r2.x) <= delta &&
std::abs(r1.y - r2.y) <= delta &&
std::abs(r1.x + r1.width - r2.x - r2.width) <= delta &&
std::abs(r1.y + r1.height - r2.y - r2.height) <= delta;
}
double eps;
};
CV_EXPORTS void groupRectangles(CV_OUT CV_IN_OUT vector<Rect>& rectList, int groupThreshold, double eps=0.2);
CV_EXPORTS_W void groupRectangles(CV_OUT CV_IN_OUT vector<Rect>& rectList, CV_OUT vector<int>& weights, int groupThreshold, double eps=0.2);
CV_EXPORTS void groupRectangles( vector<Rect>& rectList, int groupThreshold, double eps, vector<int>* weights, vector<double>* levelWeights );
CV_EXPORTS void groupRectangles(vector<Rect>& rectList, vector<int>& rejectLevels,
vector<double>& levelWeights, int groupThreshold, double eps=0.2);
CV_EXPORTS void groupRectangles_meanshift(vector<Rect>& rectList, vector<double>& foundWeights, vector<double>& foundScales,
double detectThreshold = 0.0, Size winDetSize = Size(64, 128));
class CV_EXPORTS FeatureEvaluator
{
public:
enum { HAAR = 0, LBP = 1, HOG = 2 };
virtual ~FeatureEvaluator();
virtual bool read(const FileNode& node);
virtual Ptr<FeatureEvaluator> clone() const;
virtual int getFeatureType() const;
virtual bool setImage(const Mat& img, Size origWinSize);
virtual bool setWindow(Point p);
virtual double calcOrd(int featureIdx) const;
virtual int calcCat(int featureIdx) const;
static Ptr<FeatureEvaluator> create(int type);
};
template<> CV_EXPORTS void Ptr<CvHaarClassifierCascade>::delete_obj();
enum
{
CASCADE_DO_CANNY_PRUNING=1,
CASCADE_SCALE_IMAGE=2,
CASCADE_FIND_BIGGEST_OBJECT=4,
CASCADE_DO_ROUGH_SEARCH=8
};
class CV_EXPORTS_W CascadeClassifier
{
public:
CV_WRAP CascadeClassifier();
CV_WRAP CascadeClassifier( const string& filename );
virtual ~CascadeClassifier();
CV_WRAP virtual bool empty() const;
CV_WRAP bool load( const string& filename );
virtual bool read( const FileNode& node );
CV_WRAP virtual void detectMultiScale( const Mat& image,
CV_OUT vector<Rect>& objects,
double scaleFactor=1.1,
int minNeighbors=3, int flags=0,
Size minSize=Size(),
Size maxSize=Size() );
CV_WRAP virtual void detectMultiScale( const Mat& image,
CV_OUT vector<Rect>& objects,
vector<int>& rejectLevels,
vector<double>& levelWeights,
double scaleFactor=1.1,
int minNeighbors=3, int flags=0,
Size minSize=Size(),
Size maxSize=Size(),
bool outputRejectLevels=false );
bool isOldFormatCascade() const;
virtual Size getOriginalWindowSize() const;
int getFeatureType() const;
bool setImage( const Mat& );
protected:
//virtual bool detectSingleScale( const Mat& image, int stripCount, Size processingRectSize,
// int stripSize, int yStep, double factor, vector<Rect>& candidates );
virtual bool detectSingleScale( const Mat& image, int stripCount, Size processingRectSize,
int stripSize, int yStep, double factor, vector<Rect>& candidates,
vector<int>& rejectLevels, vector<double>& levelWeights, bool outputRejectLevels=false);
protected:
enum { BOOST = 0 };
enum { DO_CANNY_PRUNING = 1, SCALE_IMAGE = 2,
FIND_BIGGEST_OBJECT = 4, DO_ROUGH_SEARCH = 8 };
friend class CascadeClassifierInvoker;
template<class FEval>
friend int predictOrdered( CascadeClassifier& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
template<class FEval>
friend int predictCategorical( CascadeClassifier& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
template<class FEval>
friend int predictOrderedStump( CascadeClassifier& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
template<class FEval>
friend int predictCategoricalStump( CascadeClassifier& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
bool setImage( Ptr<FeatureEvaluator>& feval, const Mat& image);
virtual int runAt( Ptr<FeatureEvaluator>& feval, Point pt, double& weight );
class Data
{
public:
struct CV_EXPORTS DTreeNode
{
int featureIdx;
float threshold; // for ordered features only
int left;
int right;
};
struct CV_EXPORTS DTree
{
int nodeCount;
};
struct CV_EXPORTS Stage
{
int first;
int ntrees;
float threshold;
};
bool read(const FileNode &node);
bool isStumpBased;
int stageType;
int featureType;
int ncategories;
Size origWinSize;
vector<Stage> stages;
vector<DTree> classifiers;
vector<DTreeNode> nodes;
vector<float> leaves;
vector<int> subsets;
};
Data data;
Ptr<FeatureEvaluator> featureEvaluator;
Ptr<CvHaarClassifierCascade> oldCascade;
public:
class CV_EXPORTS MaskGenerator
{
public:
virtual ~MaskGenerator() {}
virtual cv::Mat generateMask(const cv::Mat& src)=0;
virtual void initializeMask(const cv::Mat& /*src*/) {};
};
void setMaskGenerator(Ptr<MaskGenerator> maskGenerator);
Ptr<MaskGenerator> getMaskGenerator();
void setFaceDetectionMaskGenerator();
protected:
Ptr<MaskGenerator> maskGenerator;
};
//////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
// struct for detection region of interest (ROI)
struct DetectionROI
{
// scale(size) of the bounding box
double scale;
// set of requrested locations to be evaluated
vector<cv::Point> locations;
// vector that will contain confidence values for each location
vector<double> confidences;
};
struct CV_EXPORTS_W HOGDescriptor
{
public:
enum { L2Hys=0 };
enum { DEFAULT_NLEVELS=64 };
CV_WRAP HOGDescriptor() : winSize(64,128), blockSize(16,16), blockStride(8,8),
cellSize(8,8), nbins(9), derivAperture(1), winSigma(-1),
histogramNormType(HOGDescriptor::L2Hys), L2HysThreshold(0.2), gammaCorrection(true),
nlevels(HOGDescriptor::DEFAULT_NLEVELS)
{}
CV_WRAP HOGDescriptor(Size _winSize, Size _blockSize, Size _blockStride,
Size _cellSize, int _nbins, int _derivAperture=1, double _winSigma=-1,
int _histogramNormType=HOGDescriptor::L2Hys,
double _L2HysThreshold=0.2, bool _gammaCorrection=false,
int _nlevels=HOGDescriptor::DEFAULT_NLEVELS)
: winSize(_winSize), blockSize(_blockSize), blockStride(_blockStride), cellSize(_cellSize),
nbins(_nbins), derivAperture(_derivAperture), winSigma(_winSigma),
histogramNormType(_histogramNormType), L2HysThreshold(_L2HysThreshold),
gammaCorrection(_gammaCorrection), nlevels(_nlevels)
{}
CV_WRAP HOGDescriptor(const String& filename)
{
load(filename);
}
HOGDescriptor(const HOGDescriptor& d)
{
d.copyTo(*this);
}
virtual ~HOGDescriptor() {}
CV_WRAP size_t getDescriptorSize() const;
CV_WRAP bool checkDetectorSize() const;
CV_WRAP double getWinSigma() const;
CV_WRAP virtual void setSVMDetector(InputArray _svmdetector);
virtual bool read(FileNode& fn);
virtual void write(FileStorage& fs, const String& objname) const;
CV_WRAP virtual bool load(const String& filename, const String& objname=String());
CV_WRAP virtual void save(const String& filename, const String& objname=String()) const;
virtual void copyTo(HOGDescriptor& c) const;
CV_WRAP virtual void compute(const Mat& img,
CV_OUT vector<float>& descriptors,
Size winStride=Size(), Size padding=Size(),
const vector<Point>& locations=vector<Point>()) const;
//with found weights output
CV_WRAP virtual void detect(const Mat& img, CV_OUT vector<Point>& foundLocations,
CV_OUT vector<double>& weights,
double hitThreshold=0, Size winStride=Size(),
Size padding=Size(),
const vector<Point>& searchLocations=vector<Point>()) const;
//without found weights output
virtual void detect(const Mat& img, CV_OUT vector<Point>& foundLocations,
double hitThreshold=0, Size winStride=Size(),
Size padding=Size(),
const vector<Point>& searchLocations=vector<Point>()) const;
//with result weights output
CV_WRAP virtual void detectMultiScale(const Mat& img, CV_OUT vector<Rect>& foundLocations,
CV_OUT vector<double>& foundWeights, double hitThreshold=0,
Size winStride=Size(), Size padding=Size(), double scale=1.05,
double finalThreshold=2.0,bool useMeanshiftGrouping = false) const;
//without found weights output
virtual void detectMultiScale(const Mat& img, CV_OUT vector<Rect>& foundLocations,
double hitThreshold=0, Size winStride=Size(),
Size padding=Size(), double scale=1.05,
double finalThreshold=2.0, bool useMeanshiftGrouping = false) const;
CV_WRAP virtual void computeGradient(const Mat& img, CV_OUT Mat& grad, CV_OUT Mat& angleOfs,
Size paddingTL=Size(), Size paddingBR=Size()) const;
CV_WRAP static vector<float> getDefaultPeopleDetector();
CV_WRAP static vector<float> getDaimlerPeopleDetector();
CV_PROP Size winSize;
CV_PROP Size blockSize;
CV_PROP Size blockStride;
CV_PROP Size cellSize;
CV_PROP int nbins;
CV_PROP int derivAperture;
CV_PROP double winSigma;
CV_PROP int histogramNormType;
CV_PROP double L2HysThreshold;
CV_PROP bool gammaCorrection;
CV_PROP vector<float> svmDetector;
CV_PROP int nlevels;
// evaluate specified ROI and return confidence value for each location
void detectROI(const cv::Mat& img, const vector<cv::Point> &locations,
CV_OUT std::vector<cv::Point>& foundLocations, CV_OUT std::vector<double>& confidences,
double hitThreshold = 0, cv::Size winStride = Size(),
cv::Size padding = Size()) const;
// evaluate specified ROI and return confidence value for each location in multiple scales
void detectMultiScaleROI(const cv::Mat& img,
CV_OUT std::vector<cv::Rect>& foundLocations,
std::vector<DetectionROI>& locations,
double hitThreshold = 0,
int groupThreshold = 0) const;
// read/parse Dalal's alt model file
void readALTModel(std::string modelfile);
void groupRectangles(vector<cv::Rect>& rectList, vector<double>& weights, int groupThreshold, double eps) const;
};
CV_EXPORTS_W void findDataMatrix(InputArray image,
CV_OUT vector<string>& codes,
OutputArray corners=noArray(),
OutputArrayOfArrays dmtx=noArray());
CV_EXPORTS_W void drawDataMatrixCodes(InputOutputArray image,
const vector<string>& codes,
InputArray corners);
}
/****************************************************************************************\
* Datamatrix *
\****************************************************************************************/
struct CV_EXPORTS CvDataMatrixCode {
char msg[4];
CvMat *original;
CvMat *corners;
};
CV_EXPORTS std::deque<CvDataMatrixCode> cvFindDataMatrix(CvMat *im);
/****************************************************************************************\
* LINE-MOD *
\****************************************************************************************/
namespace cv {
namespace linemod {
using cv::FileNode;
using cv::FileStorage;
using cv::Mat;
using cv::noArray;
using cv::OutputArrayOfArrays;
using cv::Point;
using cv::Ptr;
using cv::Rect;
using cv::Size;
/// @todo Convert doxy comments to rst
/**
* \brief Discriminant feature described by its location and label.
*/
struct CV_EXPORTS Feature
{
int x; ///< x offset
int y; ///< y offset
int label; ///< Quantization
Feature() : x(0), y(0), label(0) {}
Feature(int x, int y, int label);
void read(const FileNode& fn);
void write(FileStorage& fs) const;
};
inline Feature::Feature(int _x, int _y, int _label) : x(_x), y(_y), label(_label) {}
struct CV_EXPORTS Template
{
int width;
int height;
int pyramid_level;
std::vector<Feature> features;
void read(const FileNode& fn);
void write(FileStorage& fs) const;
};
/**
* \brief Represents a modality operating over an image pyramid.
*/
class QuantizedPyramid
{
public:
// Virtual destructor
virtual ~QuantizedPyramid() {}
/**
* \brief Compute quantized image at current pyramid level for online detection.
*
* \param[out] dst The destination 8-bit image. For each pixel at most one bit is set,
* representing its classification.
*/
virtual void quantize(Mat& dst) const =0;
/**
* \brief Extract most discriminant features at current pyramid level to form a new template.
*
* \param[out] templ The new template.
*/
virtual bool extractTemplate(Template& templ) const =0;
/**
* \brief Go to the next pyramid level.
*
* \todo Allow pyramid scale factor other than 2
*/
virtual void pyrDown() =0;
protected:
/// Candidate feature with a score
struct Candidate
{
Candidate(int x, int y, int label, float score);
/// Sort candidates with high score to the front
bool operator<(const Candidate& rhs) const
{
return score > rhs.score;
}
Feature f;
float score;
};
/**
* \brief Choose candidate features so that they are not bunched together.
*
* \param[in] candidates Candidate features sorted by score.
* \param[out] features Destination vector of selected features.
* \param[in] num_features Number of candidates to select.
* \param[in] distance Hint for desired distance between features.
*/
static void selectScatteredFeatures(const std::vector<Candidate>& candidates,
std::vector<Feature>& features,
size_t num_features, float distance);
};
inline QuantizedPyramid::Candidate::Candidate(int x, int y, int label, float _score) : f(x, y, label), score(_score) {}
/**
* \brief Interface for modalities that plug into the LINE template matching representation.
*
* \todo Max response, to allow optimization of summing (255/MAX) features as uint8
*/
class CV_EXPORTS Modality
{
public:
// Virtual destructor
virtual ~Modality() {}
/**
* \brief Form a quantized image pyramid from a source image.
*
* \param[in] src The source image. Type depends on the modality.
* \param[in] mask Optional mask. If not empty, unmasked pixels are set to zero
* in quantized image and cannot be extracted as features.
*/
Ptr<QuantizedPyramid> process(const Mat& src,
const Mat& mask = Mat()) const
{
return processImpl(src, mask);
}
virtual std::string name() const =0;
virtual void read(const FileNode& fn) =0;
virtual void write(FileStorage& fs) const =0;
/**
* \brief Create modality by name.
*
* The following modality types are supported:
* - "ColorGradient"
* - "DepthNormal"
*/
static Ptr<Modality> create(const std::string& modality_type);
/**
* \brief Load a modality from file.
*/
static Ptr<Modality> create(const FileNode& fn);
protected:
// Indirection is because process() has a default parameter.
virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
const Mat& mask) const =0;
};
/**
* \brief Modality that computes quantized gradient orientations from a color image.
*/
class CV_EXPORTS ColorGradient : public Modality
{
public:
/**
* \brief Default constructor. Uses reasonable default parameter values.
*/
ColorGradient();
/**
* \brief Constructor.
*
* \param weak_threshold When quantizing, discard gradients with magnitude less than this.
* \param num_features How many features a template must contain.
* \param strong_threshold Consider as candidate features only gradients whose norms are
* larger than this.
*/
ColorGradient(float weak_threshold, size_t num_features, float strong_threshold);
virtual std::string name() const;
virtual void read(const FileNode& fn);
virtual void write(FileStorage& fs) const;
float weak_threshold;
size_t num_features;
float strong_threshold;
protected:
virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
const Mat& mask) const;
};
/**
* \brief Modality that computes quantized surface normals from a dense depth map.
*/
class CV_EXPORTS DepthNormal : public Modality
{
public:
/**
* \brief Default constructor. Uses reasonable default parameter values.
*/
DepthNormal();
/**
* \brief Constructor.
*
* \param distance_threshold Ignore pixels beyond this distance.
* \param difference_threshold When computing normals, ignore contributions of pixels whose
* depth difference with the central pixel is above this threshold.
* \param num_features How many features a template must contain.
* \param extract_threshold Consider as candidate feature only if there are no differing
* orientations within a distance of extract_threshold.
*/
DepthNormal(int distance_threshold, int difference_threshold, size_t num_features,
int extract_threshold);
virtual std::string name() const;
virtual void read(const FileNode& fn);
virtual void write(FileStorage& fs) const;
int distance_threshold;
int difference_threshold;
size_t num_features;
int extract_threshold;
protected:
virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
const Mat& mask) const;
};
/**
* \brief Debug function to colormap a quantized image for viewing.
*/
void colormap(const Mat& quantized, Mat& dst);
/**
* \brief Represents a successful template match.
*/
struct CV_EXPORTS Match
{
Match()
{
}
Match(int x, int y, float similarity, const std::string& class_id, int template_id);
/// Sort matches with high similarity to the front
bool operator<(const Match& rhs) const
{
// Secondarily sort on template_id for the sake of duplicate removal
if (similarity != rhs.similarity)
return similarity > rhs.similarity;
else
return template_id < rhs.template_id;
}
bool operator==(const Match& rhs) const
{
return x == rhs.x && y == rhs.y && similarity == rhs.similarity && class_id == rhs.class_id;
}
int x;
int y;
float similarity;
std::string class_id;
int template_id;
};
inline Match::Match(int _x, int _y, float _similarity, const std::string& _class_id, int _template_id)
: x(_x), y(_y), similarity(_similarity), class_id(_class_id), template_id(_template_id)
{
}
/**
* \brief Object detector using the LINE template matching algorithm with any set of
* modalities.
*/
class CV_EXPORTS Detector
{
public:
/**
* \brief Empty constructor, initialize with read().
*/
Detector();
/**
* \brief Constructor.
*
* \param modalities Modalities to use (color gradients, depth normals, ...).
* \param T_pyramid Value of the sampling step T at each pyramid level. The
* number of pyramid levels is T_pyramid.size().
*/
Detector(const std::vector< Ptr<Modality> >& modalities, const std::vector<int>& T_pyramid);
/**
* \brief Detect objects by template matching.
*
* Matches globally at the lowest pyramid level, then refines locally stepping up the pyramid.
*
* \param sources Source images, one for each modality.
* \param threshold Similarity threshold, a percentage between 0 and 100.
* \param[out] matches Template matches, sorted by similarity score.
* \param class_ids If non-empty, only search for the desired object classes.
* \param[out] quantized_images Optionally return vector<Mat> of quantized images.
* \param masks The masks for consideration during matching. The masks should be CV_8UC1
* where 255 represents a valid pixel. If non-empty, the vector must be
* the same size as sources. Each element must be
* empty or the same size as its corresponding source.
*/
void match(const std::vector<Mat>& sources, float threshold, std::vector<Match>& matches,
const std::vector<std::string>& class_ids = std::vector<std::string>(),
OutputArrayOfArrays quantized_images = noArray(),
const std::vector<Mat>& masks = std::vector<Mat>()) const;
/**
* \brief Add new object template.
*
* \param sources Source images, one for each modality.
* \param class_id Object class ID.
* \param object_mask Mask separating object from background.
* \param[out] bounding_box Optionally return bounding box of the extracted features.
*
* \return Template ID, or -1 if failed to extract a valid template.
*/
int addTemplate(const std::vector<Mat>& sources, const std::string& class_id,
const Mat& object_mask, Rect* bounding_box = NULL);
/**
* \brief Add a new object template computed by external means.
*/
int addSyntheticTemplate(const std::vector<Template>& templates, const std::string& class_id);
/**
* \brief Get the modalities used by this detector.
*
* You are not permitted to add/remove modalities, but you may dynamic_cast them to
* tweak parameters.
*/
const std::vector< Ptr<Modality> >& getModalities() const { return modalities; }
/**
* \brief Get sampling step T at pyramid_level.
*/
int getT(int pyramid_level) const { return T_at_level[pyramid_level]; }
/**
* \brief Get number of pyramid levels used by this detector.
*/
int pyramidLevels() const { return pyramid_levels; }
/**
* \brief Get the template pyramid identified by template_id.
*
* For example, with 2 modalities (Gradient, Normal) and two pyramid levels
* (L0, L1), the order is (GradientL0, NormalL0, GradientL1, NormalL1).
*/
const std::vector<Template>& getTemplates(const std::string& class_id, int template_id) const;
int numTemplates() const;
int numTemplates(const std::string& class_id) const;
int numClasses() const { return static_cast<int>(class_templates.size()); }
std::vector<std::string> classIds() const;
void read(const FileNode& fn);
void write(FileStorage& fs) const;
std::string readClass(const FileNode& fn, const std::string &class_id_override = "");
void writeClass(const std::string& class_id, FileStorage& fs) const;
void readClasses(const std::vector<std::string>& class_ids,
const std::string& format = "templates_%s.yml.gz");
void writeClasses(const std::string& format = "templates_%s.yml.gz") const;
protected:
std::vector< Ptr<Modality> > modalities;
int pyramid_levels;
std::vector<int> T_at_level;
typedef std::vector<Template> TemplatePyramid;
typedef std::map<std::string, std::vector<TemplatePyramid> > TemplatesMap;
TemplatesMap class_templates;
typedef std::vector<Mat> LinearMemories;
// Indexed as [pyramid level][modality][quantized label]
typedef std::vector< std::vector<LinearMemories> > LinearMemoryPyramid;
void matchClass(const LinearMemoryPyramid& lm_pyramid,
const std::vector<Size>& sizes,
float threshold, std::vector<Match>& matches,
const std::string& class_id,
const std::vector<TemplatePyramid>& template_pyramids) const;
};
/**
* \brief Factory function for detector using LINE algorithm with color gradients.
*
* Default parameter settings suitable for VGA images.
*/
CV_EXPORTS Ptr<Detector> getDefaultLINE();
/**
* \brief Factory function for detector using LINE-MOD algorithm with color gradients
* and depth normals.
*
* Default parameter settings suitable for VGA images.
*/
CV_EXPORTS Ptr<Detector> getDefaultLINEMOD();
} // namespace linemod
} // namespace cv
#endif
#endif