Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
classify.cpp
Go to the documentation of this file.
1 
2 // File: classify.cpp
3 // Description: classify class.
4 // Author: Samuel Charron
5 //
6 // (C) Copyright 2006, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 
19 // Include automatically generated configuration file if running autoconf.
20 #ifdef HAVE_CONFIG_H
21 #include "config_auto.h"
22 #endif
23 
24 #include "classify.h"
25 #include "fontinfo.h"
26 #include "intproto.h"
27 #include "mfoutline.h"
28 #include "scrollview.h"
29 #include "shapetable.h"
30 #include "unicity_table.h"
31 #include <string.h>
32 
33 namespace tesseract {
35  : BOOL_MEMBER(prioritize_division, FALSE,
36  "Prioritize blob division over chopping", this->params()),
37  INT_MEMBER(tessedit_single_match, FALSE,
38  "Top choice only from CP", this->params()),
39  BOOL_MEMBER(classify_enable_learning, true,
40  "Enable adaptive classifier", this->params()),
41  INT_MEMBER(classify_debug_level, 0, "Classify debug level",
42  this->params()),
43  INT_MEMBER(classify_norm_method, character, "Normalization Method ...",
44  this->params()),
45  double_MEMBER(classify_char_norm_range, 0.2,
46  "Character Normalization Range ...", this->params()),
47  double_MEMBER(classify_min_norm_scale_x, 0.0, "Min char x-norm scale ...",
48  this->params()), /* PREV DEFAULT 0.1 */
49  double_MEMBER(classify_max_norm_scale_x, 0.325, "Max char x-norm scale ...",
50  this->params()), /* PREV DEFAULT 0.3 */
51  double_MEMBER(classify_min_norm_scale_y, 0.0, "Min char y-norm scale ...",
52  this->params()), /* PREV DEFAULT 0.1 */
53  double_MEMBER(classify_max_norm_scale_y, 0.325, "Max char y-norm scale ...",
54  this->params()), /* PREV DEFAULT 0.3 */
55  BOOL_MEMBER(tess_cn_matching, 0, "Character Normalized Matching",
56  this->params()),
57  BOOL_MEMBER(tess_bn_matching, 0, "Baseline Normalized Matching",
58  this->params()),
59  BOOL_MEMBER(classify_enable_adaptive_matcher, 1,
60  "Enable adaptive classifier",
61  this->params()),
62  BOOL_MEMBER(classify_use_pre_adapted_templates, 0,
63  "Use pre-adapted classifier templates", this->params()),
64  BOOL_MEMBER(classify_save_adapted_templates, 0,
65  "Save adapted templates to a file", this->params()),
66  BOOL_MEMBER(classify_enable_adaptive_debugger, 0, "Enable match debugger",
67  this->params()),
68  INT_MEMBER(matcher_debug_level, 0, "Matcher Debug Level", this->params()),
69  INT_MEMBER(matcher_debug_flags, 0, "Matcher Debug Flags", this->params()),
70  INT_MEMBER(classify_learning_debug_level, 0, "Learning Debug Level: ",
71  this->params()),
72  double_MEMBER(matcher_good_threshold, 0.125, "Good Match (0-1)",
73  this->params()),
74  double_MEMBER(matcher_great_threshold, 0.0, "Great Match (0-1)",
75  this->params()),
76  double_MEMBER(matcher_perfect_threshold, 0.02, "Perfect Match (0-1)",
77  this->params()),
78  double_MEMBER(matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)",
79  this->params()),
80  double_MEMBER(matcher_rating_margin, 0.1, "New template margin (0-1)",
81  this->params()),
82  double_MEMBER(matcher_avg_noise_size, 12.0, "Avg. noise blob length",
83  this->params()),
84  INT_MEMBER(matcher_permanent_classes_min, 1, "Min # of permanent classes",
85  this->params()),
86  INT_MEMBER(matcher_min_examples_for_prototyping, 3,
87  "Reliable Config Threshold", this->params()),
88  INT_MEMBER(matcher_sufficient_examples_for_prototyping, 5,
89  "Enable adaption even if the ambiguities have not been seen",
90  this->params()),
91  double_MEMBER(matcher_clustering_max_angle_delta, 0.015,
92  "Maximum angle delta for prototype clustering",
93  this->params()),
94  double_MEMBER(classify_misfit_junk_penalty, 0.0,
95  "Penalty to apply when a non-alnum is vertically out of "
96  "its expected textline position",
97  this->params()),
98  double_MEMBER(rating_scale, 1.5, "Rating scaling factor", this->params()),
99  double_MEMBER(certainty_scale, 20.0, "Certainty scaling factor",
100  this->params()),
101  double_MEMBER(tessedit_class_miss_scale, 0.00390625,
102  "Scale factor for features not used", this->params()),
103  INT_MEMBER(classify_adapt_proto_threshold, 230,
104  "Threshold for good protos during adaptive 0-255",
105  this->params()),
106  INT_MEMBER(classify_adapt_feature_threshold, 230,
107  "Threshold for good features during adaptive 0-255",
108  this->params()),
110  "Do not include character fragments in the"
111  " results of the classifier", this->params()),
112  double_MEMBER(classify_character_fragments_garbage_certainty_threshold,
113  -3.0, "Exclude fragments that do not look like whole"
114  " characters from training and adaption", this->params()),
115  BOOL_MEMBER(classify_debug_character_fragments, FALSE,
116  "Bring up graphical debugging windows for fragments training",
117  this->params()),
118  BOOL_MEMBER(matcher_debug_separate_windows, FALSE,
119  "Use two different windows for debugging the matching: "
120  "One for the protos and one for the features.", this->params()),
121  STRING_MEMBER(classify_learn_debug_str, "", "Class str to debug learning",
122  this->params()),
123  INT_MEMBER(classify_class_pruner_threshold, 229,
124  "Class Pruner Threshold 0-255", this->params()),
125  INT_MEMBER(classify_class_pruner_multiplier, 30,
126  "Class Pruner Multiplier 0-255: ", this->params()),
127  INT_MEMBER(classify_cp_cutoff_strength, 7,
128  "Class Pruner CutoffStrength: ", this->params()),
130  "Integer Matcher Multiplier 0-255: ", this->params()),
131  EnableLearning(true),
132  INT_MEMBER(il1_adaption_test, 0, "Dont adapt to i/I at beginning of word",
133  this->params()),
134  BOOL_MEMBER(classify_bln_numeric_mode, 0,
135  "Assume the input is numbers [0-9].", this->params()),
136  shape_table_(NULL),
137  dict_(&image_) {
148  AllProtosOn = NULL;
149  PrunedProtos = NULL;
150  AllConfigsOn = NULL;
151  AllProtosOff = NULL;
154  NormProtos = NULL;
155 
156  AdaptiveMatcherCalls = 0;
157  BaselineClassifierCalls = 0;
158  CharNormClassifierCalls = 0;
159  AmbigClassifierCalls = 0;
160  NumWordsAdaptedTo = 0;
161  NumCharsAdaptedTo = 0;
162  NumBaselineClassesTried = 0;
163  NumCharNormClassesTried = 0;
164  NumAmbigClassesTried = 0;
165  NumClassesOutput = 0;
166  NumAdaptationsFailed = 0;
167 
168  FeaturesHaveBeenExtracted = false;
169  FeaturesOK = true;
170  learn_debug_win_ = NULL;
171  learn_fragmented_word_debug_win_ = NULL;
172  learn_fragments_debug_win_ = NULL;
173 
174  CharNormCutoffs = new uinT16[MAX_NUM_CLASSES];
175  BaselineCutoffs = new uinT16[MAX_NUM_CLASSES];
176 }
177 
180  delete learn_debug_win_;
181  delete learn_fragmented_word_debug_win_;
182  delete learn_fragments_debug_win_;
183  delete[] CharNormCutoffs;
184  delete[] BaselineCutoffs;
185 }
186 
187 } // namespace tesseract