Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
cube_control.cpp
Go to the documentation of this file.
1 /******************************************************************
2  * File: cube_control.cpp
3  * Description: Tesseract class methods for invoking cube convolutional
4  * neural network word recognizer.
5  * Author: Raquel Romano
6  * Created: September 2009
7  *
8  **********************************************************************/
9 
10 // Include automatically generated configuration file if running autoconf.
11 #ifdef HAVE_CONFIG_H
12 #include "config_auto.h"
13 #endif
14 
15 #include "allheaders.h"
16 
17 #include "cube_object.h"
18 #include "cube_reco_context.h"
19 #include "tesseractclass.h"
21 
22 namespace tesseract {
23 
24 /**********************************************************************
25  * convert_prob_to_tess_certainty
26  *
27  * Normalize a probability in the range [0.0, 1.0] to a tesseract
28  * certainty in the range [-20.0, 0.0]
29  **********************************************************************/
30 static float convert_prob_to_tess_certainty(float prob) {
31  return (prob - 1.0) * 20.0;
32 }
33 
34 /**********************************************************************
35  * char_box_to_tbox
36  *
37  * Create a TBOX from a character bounding box. If nonzero, the
38  * x_offset accounts for any additional padding of the word box that
39  * should be taken into account.
40  *
41  **********************************************************************/
42 TBOX char_box_to_tbox(Box* char_box, TBOX word_box, int x_offset) {
43  l_int32 left;
44  l_int32 top;
45  l_int32 width;
46  l_int32 height;
47  l_int32 right;
48  l_int32 bottom;
49 
50  boxGetGeometry(char_box, &left, &top, &width, &height);
51  left += word_box.left() - x_offset;
52  right = left + width;
53  top = word_box.bottom() + word_box.height() - top;
54  bottom = top - height;
55  return TBOX(left, bottom, right, top);
56 }
57 
58 /**********************************************************************
59  * extract_cube_state
60  *
61  * Extract CharSamp objects and character bounding boxes from the
62  * CubeObject's state. The caller should free both structres.
63  *
64 **********************************************************************/
66  int* num_chars,
67  Boxa** char_boxes,
68  CharSamp*** char_samples) {
69  if (!cube_obj) {
70  if (cube_debug_level > 0) {
71  tprintf("Cube WARNING (extract_cube_state): Invalid cube object "
72  "passed to extract_cube_state\n");
73  }
74  return false;
75  }
76 
77  // Note that the CubeObject accessors return either the deslanted or
78  // regular objects search object or beam search object, whichever
79  // was used in the last call to Recognize()
80  CubeSearchObject* cube_search_obj = cube_obj->SrchObj();
81  if (!cube_search_obj) {
82  if (cube_debug_level > 0) {
83  tprintf("Cube WARNING (Extract_cube_state): Could not retrieve "
84  "cube's search object in extract_cube_state.\n");
85  }
86  return false;
87  }
88  BeamSearch *beam_search_obj = cube_obj->BeamObj();
89  if (!beam_search_obj) {
90  if (cube_debug_level > 0) {
91  tprintf("Cube WARNING (Extract_cube_state): Could not retrieve "
92  "cube's beam search object in extract_cube_state.\n");
93  }
94  return false;
95  }
96 
97  // Get the character samples and bounding boxes by backtracking
98  // through the beam search path
99  int best_node_index = beam_search_obj->BestPresortedNodeIndex();
100  *char_samples = beam_search_obj->BackTrack(
101  cube_search_obj, best_node_index, num_chars, NULL, char_boxes);
102  if (!*char_samples)
103  return false;
104  return true;
105 }
106 
107 /**********************************************************************
108  * create_cube_box_word
109  *
110  * Fill the given BoxWord with boxes from character bounding
111  * boxes. The char_boxes have local coordinates w.r.t. the
112  * word bounding box, i.e., the left-most character bbox of each word
113  * has (0,0) left-top coord, but the BoxWord must be defined in page
114  * coordinates.
115  **********************************************************************/
116 bool Tesseract::create_cube_box_word(Boxa *char_boxes,
117  int num_chars,
118  TBOX word_box,
119  BoxWord* box_word) {
120  if (!box_word) {
121  if (cube_debug_level > 0) {
122  tprintf("Cube WARNING (create_cube_box_word): Invalid box_word.\n");
123  }
124  return false;
125  }
126 
127  // Find the x-coordinate of left-most char_box, which could be
128  // nonzero if the word image was padded before recognition took place.
129  int x_offset = -1;
130  for (int i = 0; i < num_chars; ++i) {
131  Box* char_box = boxaGetBox(char_boxes, i, L_CLONE);
132  if (x_offset < 0 || char_box->x < x_offset) {
133  x_offset = char_box->x;
134  }
135  boxDestroy(&char_box);
136  }
137 
138  for (int i = 0; i < num_chars; ++i) {
139  Box* char_box = boxaGetBox(char_boxes, i, L_CLONE);
140  TBOX tbox = char_box_to_tbox(char_box, word_box, x_offset);
141  boxDestroy(&char_box);
142  box_word->InsertBox(i, tbox);
143  }
144  return true;
145 }
146 
147 /**********************************************************************
148  * create_werd_choice
149  *
150  **********************************************************************/
151 static WERD_CHOICE *create_werd_choice(
152  CharSamp** char_samples,
153  int num_chars,
154  const char* str,
155  float certainty,
156  const UNICHARSET &unicharset,
157  CharSet* cube_char_set
158  ) {
159  // Insert unichar ids into WERD_CHOICE
160  WERD_CHOICE *werd_choice = new WERD_CHOICE(&unicharset, num_chars);
161  // within a word, cube recognizes the word in reading order.
162  werd_choice->set_unichars_in_script_order(true);
163  ASSERT_HOST(werd_choice != NULL);
164  UNICHAR_ID uch_id;
165  for (int i = 0; i < num_chars; ++i) {
166  uch_id = cube_char_set->UnicharID(char_samples[i]->StrLabel());
167  if (uch_id != INVALID_UNICHAR_ID)
169  uch_id, 1, 0.0, certainty);
170  }
171 
172  BLOB_CHOICE *blob_choice;
173  BLOB_CHOICE_LIST *choices_list;
174  BLOB_CHOICE_IT choices_list_it;
175  BLOB_CHOICE_LIST_CLIST *blob_choices = new BLOB_CHOICE_LIST_CLIST();
176  BLOB_CHOICE_LIST_C_IT blob_choices_it;
177  blob_choices_it.set_to_list(blob_choices);
178 
179  for (int i = 0; i < werd_choice->length(); ++i) {
180  // Create new BLOB_CHOICE_LIST for this unichar
181  choices_list = new BLOB_CHOICE_LIST();
182  choices_list_it.set_to_list(choices_list);
183  // Add a single BLOB_CHOICE to the list
184  blob_choice = new BLOB_CHOICE(werd_choice->unichar_id(i),
185  0.0, certainty, -1, -1, 0, 0, 0, false);
186  choices_list_it.add_after_then_move(blob_choice);
187  // Add list to the clist
188  blob_choices_it.add_to_end(choices_list);
189  }
190  werd_choice->set_certainty(certainty);
191  werd_choice->set_blob_choices(blob_choices);
192  return werd_choice;
193 }
194 
195 /**********************************************************************
196  * init_cube_objects
197  *
198  * Instantiates Tesseract object's CubeRecoContext and TesseractCubeCombiner.
199  * Returns false if cube context could not be created or if load_combiner is
200  * true, but the combiner could not be loaded.
201  **********************************************************************/
202 bool Tesseract::init_cube_objects(bool load_combiner,
203  TessdataManager *tessdata_manager) {
204  ASSERT_HOST(cube_cntxt_ == NULL);
205  ASSERT_HOST(tess_cube_combiner_ == NULL);
206 
207  // Create the cube context object
208  cube_cntxt_ = CubeRecoContext::Create(this, tessdata_manager, &unicharset);
209  if (cube_cntxt_ == NULL) {
210  if (cube_debug_level > 0) {
211  tprintf("Cube WARNING (Tesseract::init_cube_objects()): Failed to "
212  "instantiate CubeRecoContext\n");
213  }
214  return false;
215  }
216 
217  // Create the combiner object and load the combiner net for target languages.
218  if (load_combiner) {
219  tess_cube_combiner_ = new tesseract::TesseractCubeCombiner(cube_cntxt_);
220  if (!tess_cube_combiner_ || !tess_cube_combiner_->LoadCombinerNet()) {
221  delete cube_cntxt_;
222  cube_cntxt_ = NULL;
223  if (tess_cube_combiner_ != NULL) {
224  delete tess_cube_combiner_;
225  tess_cube_combiner_ = NULL;
226  }
227  if (cube_debug_level > 0)
228  tprintf("Cube ERROR (Failed to instantiate TesseractCubeCombiner\n");
229  return false;
230  }
231  }
232  return true;
233 }
234 
235 /**********************************************************************
236  * run_cube_combiner
237  *
238  * Iterates through tesseract's results and calls cube on each word,
239  * combining the results with the existing tesseract result.
240  **********************************************************************/
242  if (page_res == NULL || tess_cube_combiner_ == NULL)
243  return;
244  PAGE_RES_IT page_res_it(page_res);
245  // Iterate through the word results and call cube on each word.
246  for (page_res_it.restart_page(); page_res_it.word () != NULL;
247  page_res_it.forward()) {
248  WERD_RES* word = page_res_it.word();
249  // Skip cube entirely if tesseract's certainty is greater than threshold.
250  int combiner_run_thresh = convert_prob_to_tess_certainty(
251  cube_cntxt_->Params()->CombinerRunThresh());
252  if (word->best_choice->certainty() >= combiner_run_thresh) {
253  continue;
254  }
255  // Use the same language as Tesseract used for the word.
256  Tesseract* lang_tess = word->tesseract;
257 
258  // Setup a trial WERD_RES in which to classify with cube.
259  WERD_RES cube_word;
260  cube_word.InitForRetryRecognition(*word);
261  CubeObject *cube_obj = lang_tess->cube_recognize_word(
262  page_res_it.block()->block, &cube_word);
263  if (cube_obj != NULL)
264  lang_tess->cube_combine_word(cube_obj, &cube_word, word);
265  delete cube_obj;
266  }
267 }
268 
269 /**********************************************************************
270  * cube_word_pass1
271  *
272  * Recognizes a single word using (only) cube. Compatible with
273  * Tesseract's classify_word_pass1/classify_word_pass2.
274  **********************************************************************/
275 void Tesseract::cube_word_pass1(BLOCK* block, ROW *row, WERD_RES *word) {
276  CubeObject *cube_obj = cube_recognize_word(block, word);
277  delete cube_obj;
278 }
279 
280 /**********************************************************************
281  * cube_recognize_word
282  *
283  * Cube recognizer to recognize a single word as with classify_word_pass1
284  * but also returns the cube object in case the combiner is needed.
285  **********************************************************************/
287  if (!cube_binary_ || !cube_cntxt_) {
288  if (cube_debug_level > 0 && !cube_binary_)
289  tprintf("Tesseract::run_cube(): NULL binary image.\n");
290  word->SetupFake(unicharset);
291  return NULL;
292  }
293  TBOX word_box = word->word->bounding_box();
294  if (block != NULL && (block->re_rotation().x() != 1.0f ||
295  block->re_rotation().y() != 0.0f)) {
296  // TODO(rays) We have to rotate the bounding box to get the true coords.
297  // This will be achieved in the future via DENORM.
298  // In the mean time, cube can't process this word.
299  if (cube_debug_level > 0) {
300  tprintf("Cube can't process rotated word at:");
301  word_box.print();
302  }
303  word->SetupFake(unicharset);
304  return NULL;
305  }
306  CubeObject* cube_obj = new tesseract::CubeObject(
307  cube_cntxt_, cube_binary_, word_box.left(),
308  pixGetHeight(cube_binary_) - word_box.top(),
309  word_box.width(), word_box.height());
310  if (!cube_recognize(cube_obj, block, word)) {
311  delete cube_obj;
312  return NULL;
313  }
314  return cube_obj;
315 }
316 
317 /**********************************************************************
318  * cube_combine_word
319  *
320  * Combines the cube and tesseract results for a single word, leaving the
321  * result in tess_word.
322  **********************************************************************/
324  WERD_RES* tess_word) {
325  float combiner_prob = tess_cube_combiner_->CombineResults(tess_word,
326  cube_obj);
327  // If combiner probability is greater than tess/cube combiner
328  // classifier threshold, i.e. tesseract wins, then just return the
329  // tesseract result unchanged, as the combiner knows nothing about how
330  // correct the answer is. If cube and tesseract agree, then improve the
331  // scores before returning.
332  WERD_CHOICE* tess_best = tess_word->best_choice;
333  WERD_CHOICE* cube_best = cube_word->best_choice;
335  tprintf("Combiner prob = %g vs threshold %g\n",
336  combiner_prob, cube_cntxt_->Params()->CombinerClassifierThresh());
337  }
338  if (combiner_prob >=
339  cube_cntxt_->Params()->CombinerClassifierThresh()) {
340  if (tess_best->unichar_string() == cube_best->unichar_string()) {
341  // Cube and tess agree, so improve the scores.
342  tess_best->set_rating(tess_best->rating() / 2);
343  tess_best->set_certainty(tess_best->certainty() / 2);
344  }
345  return;
346  }
347  // Cube wins.
348  // It is better for the language combiner to have all tesseract scores,
349  // so put them in the cube result.
350  cube_best->set_rating(tess_best->rating());
351  cube_best->set_certainty(tess_best->certainty());
353  tprintf("Cube INFO: tesseract result replaced by cube: %s -> %s\n",
354  tess_best->unichar_string().string(),
355  cube_best->unichar_string().string());
356  }
357  tess_word->ConsumeWordResults(cube_word);
358 }
359 
360 /**********************************************************************
361  * cube_recognize
362  *
363  * Call cube on the current word, and write the result to word.
364  * Sets up a fake result and returns false if something goes wrong.
365  **********************************************************************/
367  WERD_RES *word) {
368  if (!word->SetupForCubeRecognition(unicharset, this, block)) {
369  return false; // Graphics block.
370  }
371 
372  // Run cube
373  WordAltList *cube_alt_list = cube_obj->RecognizeWord();
374  if (!cube_alt_list || cube_alt_list->AltCount() <= 0) {
375  if (cube_debug_level > 0) {
376  tprintf("Cube returned nothing for word at:");
377  word->word->bounding_box().print();
378  }
379  word->SetupFake(unicharset);
380  return false;
381  }
382 
383  // Get cube's best result and its probability, mapped to tesseract's
384  // certainty range
385  char_32 *cube_best_32 = cube_alt_list->Alt(0);
386  double cube_prob = CubeUtils::Cost2Prob(cube_alt_list->AltCost(0));
387  float cube_certainty = convert_prob_to_tess_certainty(cube_prob);
388  string cube_best_str;
389  CubeUtils::UTF32ToUTF8(cube_best_32, &cube_best_str);
390 
391  // Retrieve Cube's character bounding boxes and CharSamples,
392  // corresponding to the most recent call to RecognizeWord().
393  Boxa *char_boxes = NULL;
394  CharSamp **char_samples = NULL;;
395  int num_chars;
396  if (!extract_cube_state(cube_obj, &num_chars, &char_boxes, &char_samples)
397  && cube_debug_level > 0) {
398  tprintf("Cube WARNING (Tesseract::cube_recognize): Cannot extract "
399  "cube state.\n");
400  word->SetupFake(unicharset);
401  return false;
402  }
403 
404  // Convert cube's character bounding boxes to a BoxWord.
405  BoxWord cube_box_word;
406  TBOX tess_word_box = word->word->bounding_box();
407  if (word->denorm.block() != NULL)
408  tess_word_box.rotate(word->denorm.block()->re_rotation());
409  bool box_word_success = create_cube_box_word(char_boxes, num_chars,
410  tess_word_box,
411  &cube_box_word);
412  boxaDestroy(&char_boxes);
413  if (!box_word_success) {
414  if (cube_debug_level > 0) {
415  tprintf("Cube WARNING (Tesseract::cube_recognize): Could not "
416  "create cube BoxWord\n");
417  }
418  word->SetupFake(unicharset);
419  return false;
420  }
421 
422  // Create cube's best choice.
423  WERD_CHOICE* cube_werd_choice = create_werd_choice(
424  char_samples, num_chars, cube_best_str.c_str(), cube_certainty,
425  unicharset, cube_cntxt_->CharacterSet());
426  delete []char_samples;
427 
428  if (!cube_werd_choice) {
429  if (cube_debug_level > 0) {
430  tprintf("Cube WARNING (Tesseract::cube_recognize): Could not "
431  "create cube WERD_CHOICE\n");
432  }
433  word->SetupFake(unicharset);
434  return false;
435  }
437  tprintf("Cube result: %s r=%g, c=%g\n",
438  cube_werd_choice->unichar_string().string(),
439  cube_werd_choice->rating(),
440  cube_werd_choice->certainty());
441  }
442 
443  // Fill tesseract result's fields with cube results
444  fill_werd_res(cube_box_word, cube_werd_choice, cube_best_str.c_str(), word);
445  return true;
446 }
447 
448 /**********************************************************************
449  * fill_werd_res
450  *
451  * Fill Tesseract's word result fields with cube's.
452  *
453  **********************************************************************/
454 void Tesseract::fill_werd_res(const BoxWord& cube_box_word,
455  WERD_CHOICE* cube_werd_choice,
456  const char* cube_best_str,
457  WERD_RES* tess_werd_res) {
458  // Replace tesseract results's best choice with cube's
459  tess_werd_res->best_choice = cube_werd_choice;
460  tess_werd_res->raw_choice = new WERD_CHOICE(*cube_werd_choice);
461 
462  delete tess_werd_res->box_word;
463  tess_werd_res->box_word = new BoxWord(cube_box_word);
464  tess_werd_res->box_word->ClipToOriginalWord(tess_werd_res->denorm.block(),
465  tess_werd_res->word);
466  // Fill text and remaining fields
467  tess_werd_res->word->set_text(cube_best_str);
468  tess_werd_res->tess_failed = FALSE;
469  tess_werd_res->tess_accepted =
470  tess_acceptable_word(tess_werd_res->best_choice,
471  tess_werd_res->raw_choice);
472  // There is no output word, so we can' call AdaptableWord, but then I don't
473  // think we need to. Fudge the result with accepted.
474  tess_werd_res->tess_would_adapt = tess_werd_res->tess_accepted;
475 
476  // Initialize the reject_map and set it to done, i.e., ignore all of
477  // tesseract's tests for rejection
478  tess_werd_res->reject_map.initialise(cube_werd_choice->length());
479  tess_werd_res->done = tess_werd_res->tess_accepted;
480 
481  // Some sanity checks
482  ASSERT_HOST(tess_werd_res->best_choice->length() ==
483  tess_werd_res->best_choice->blob_choices()->length());
484  ASSERT_HOST(tess_werd_res->best_choice->length() ==
485  tess_werd_res->reject_map.length());
486 }
487 
488 } // namespace tesseract