21 #pragma warning(disable:4244) // Conversion warnings
22 #pragma warning(disable:4305) // int/float warnings
52 #include "config_auto.h"
64 void Tesseract::set_done(
70 if (tessedit_ok_mode == 0) {
77 else if (tessedit_ok_mode == 1) {
81 if (word->
done && (pass == 1) && one_ell_conflict (word,
FALSE))
87 else if (tessedit_ok_mode == 2) {
91 if (word->
done && (pass == 1) && one_ell_conflict (word,
FALSE))
101 if (tessedit_rejection_debug)
102 tprintf (
"\nVETO Tess accepting poor word \"%s\"\n",
111 else if (tessedit_ok_mode == 3) {
115 if (word->
done && (pass == 1) && one_ell_conflict (word,
FALSE))
124 if (tessedit_rejection_debug)
125 tprintf (
"\nVETO Tess accepting poor word \"%s\"\n",
134 else if (tessedit_ok_mode == 4) {
138 if (word->
done && (pass == 1) && one_ell_conflict (word,
FALSE))
147 (test_ambig_word (word)))) {
149 if (tessedit_rejection_debug)
150 tprintf (
"\nVETO Tess accepting poor word \"%s\"\n",
159 else if (tessedit_ok_mode == 5) {
163 if (word->
done && (pass == 1) && one_ell_conflict (word,
FALSE))
171 (test_ambig_word (word)))) {
173 if (tessedit_rejection_debug)
174 tprintf (
"\nVETO Tess accepting poor word \"%s\"\n",
182 tprintf (
"BAD tessedit_ok_mode\n");
195 void Tesseract::make_reject_map(
197 BLOB_CHOICE_LIST_CLIST *blob_choices,
205 check_debug_pt(word, -1);
206 set_done(word, pass);
212 if (tessedit_reject_mode == 0) {
215 }
else if (tessedit_reject_mode == 5) {
224 one_ell_conflict(word,
TRUE);
234 if (rej_use_tess_blanks &&
239 if (rej_use_good_perm) {
240 if ((best_choice->
permuter() == SYSTEM_DAWG_PERM ||
241 best_choice->
permuter() == FREQ_DAWG_PERM ||
242 best_choice->
permuter() == USER_DAWG_PERM) &&
243 (!rej_use_sensible_wd ||
244 acceptable_word_string(*word->
uch_set,
249 }
else if (best_choice->
permuter() == NUMBER_PERM) {
250 if (rej_alphas_in_number_perm) {
251 for (i = 0, offset = 0;
269 tprintf(
"BAD tessedit_reject_mode\n");
273 if (tessedit_image_border > -1)
274 reject_edge_blobs(word);
276 check_debug_pt (word, 10);
277 if (tessedit_rejection_debug) {
279 tprintf(
"Certainty: %f Rating: %f\n",
285 check_debug_pt(word, 20);
321 BLOB_CHOICE_LIST_CLIST *blob_choices) {
326 BLOB_CHOICE_LIST_C_IT list_it = blob_choices;
327 BLOB_CHOICE_IT choice_it;
333 (
"ASSERT FAIL string:\"%s\"; strlen=%d; choices len=%d; blob len=%d\n",
344 for (list_it.mark_cycle_pt ();
345 !list_it.cycled_list (); list_it.forward (), i++,
350 choice_it.set_to_list (list_it.data ());
352 (choice_it.length () == 0))
355 else if (choice_it.data ()->certainty () < threshold)
371 BLOB_CHOICE_LIST_CLIST *blob_choices) {
374 inT16 ok_blob_count = 0;
380 BLOB_CHOICE_LIST_C_IT list_it = blob_choices;
381 BLOB_CHOICE_IT choice_it;
383 blob_count = blob_choices->length ();
384 ratings = (
float *)
alloc_mem (blob_count *
sizeof (
float));
385 for (list_it.mark_cycle_pt (), index = 0;
386 !list_it.cycled_list (); list_it.forward (), index++) {
387 choice_it.set_to_list (list_it.data ());
388 if (choice_it.length () > 0) {
389 ratings[ok_blob_count] = choice_it.data ()->certainty ();
398 qsort (ratings, ok_blob_count,
sizeof (
float),
sort_floats);
401 gapstart = ratings[0] - 1;
402 if (ok_blob_count >= 3) {
403 for (index = 0; index < ok_blob_count - 1; index++) {
404 if (ratings[index + 1] - ratings[index] > bestgap) {
405 bestgap = ratings[index + 1] - ratings[index];
407 gapstart = ratings[index];
411 threshold = gapstart + bestgap / 2;
437 for (
int blobindex = 0; blobindex < blobcount; blobindex++) {
443 word->
reject_map[blobindex].setrej_edge_char();
460 inT16 first_alphanum_index_;
461 inT16 first_alphanum_offset_;
464 BOOL8 non_conflict_set_char;
468 BOOL8 dict_perm_type;
474 word_len = strlen (lengths);
487 for (i = 0, offset = 0, non_conflict_set_char =
FALSE;
488 (i < word_len) && !non_conflict_set_char; offset += lengths[i++])
489 non_conflict_set_char =
493 if (!non_conflict_set_char) {
511 dict_word_ok = (dict_word_type > 0) &&
516 (dict_perm_type && dict_word_ok)) {
519 if (lengths[first_alphanum_index_] == 1 &&
520 word[first_alphanum_offset_] ==
'I') {
526 setrej_1Il_conflict();
535 if (lengths[first_alphanum_index_] == 1 &&
536 word[first_alphanum_offset_] ==
'l') {
542 setrej_1Il_conflict();
566 if (lengths[first_alphanum_index_] == 1 &&
567 word[first_alphanum_offset_] ==
'l') {
574 else if (lengths[first_alphanum_index_] == 1 &&
575 word[first_alphanum_offset_] ==
'I') {
594 for (i = 0, offset = 0; word[offset] !=
'\0';
596 if ((!allow_1s || (word[offset] !=
'1')) &&
599 word_res->
reject_map[i].setrej_1Il_conflict ();
616 setrej_1Il_conflict ();
634 const char *word_lengths) {
638 for (i = 0, offset = 0; word[offset] !=
'\0'; offset += word_lengths[i++]) {
647 const char *word_lengths) {
651 for (i = 0, offset = 0; word[offset] !=
'\0'; offset += word_lengths[i++]) {
660 const char *word_lengths) {
665 for (i = 0, offset = 0; word[offset] !=
'\0'; offset += word_lengths[i++]) {
674 const char *word_lengths) {
678 for (i = 0, offset = 0; word[offset] !=
'\0'; offset += word_lengths[i++]) {
680 (word_lengths[i] != 1 || word[offset] !=
'1'))
713 for (i = 0, offset = 0; i < word_len;
728 for (i = 0, offset = 0; i < word_len;
763 inT16 accepted_char_quality;
780 (char_quality == accepted_char_quality))
789 return dict_word_type == DOC_DAWG_PERM ? 0 : dict_word_type;
799 int prev_right = -9999;
809 bool modified =
false;
810 for (i = 0; i < best_choice->
length() && blob !=
NULL; ++i,
812 out_box = blob->bounding_box();
813 if (blob->next ==
NULL)
816 next_left = blob->next->bounding_box().
left();
819 (out_box.
left() > prev_right) && (out_box.
right() < next_left)) {
820 aspect_ratio = out_box.
width() / (float) out_box.
height();
829 word_res->
reject_map[i].setrej_hyphen_accept();
836 else if (best_choice->
unichar_id(i) == unichar_dash) {
839 word_res->
reject_map[i].setrej_hyphen_accept();
848 prev_right = out_box.
right();
865 for (i = 0; i < best_choice->
length() && blob !=
NULL; ++i,
869 out_box = blob->bounding_box();
877 if (unichar_0 == INVALID_UNICHAR_ID ||
879 unichar_O == INVALID_UNICHAR_ID ||
883 bool modified =
false;
884 for (i = 1; i < best_choice->
length(); ++i) {
885 if (best_choice->
unichar_id(i) == unichar_0 ||
888 if ((i+1) < best_choice->
length() &&
896 (i+1) < best_choice->
length() &&
899 (i+2) < best_choice->
length() &&
909 (((i+1) < best_choice->
length() &&
913 (i == best_choice->
length() - 1))) {
919 (i+1) < best_choice->
length() &&
926 (i+2) < best_choice->
length() &&
939 (i+2) < best_choice->
length() &&
950 (i+1) < best_choice->
length() &&
960 if (best_choice->
unichar_id(i-2) == unichar_O) {
964 while (i < best_choice->length() &&
978 return ch_set.
get_isupper(unichar_id) && !ch_set.
eq(unichar_id,
"O");
982 return ch_set.
get_isdigit(unichar_id) && !ch_set.
eq(unichar_id,
"0");