19 #ifndef TESSERACT_WORDREC_WORDREC_H__
20 #define TESSERACT_WORDREC_WORDREC_H__
44 BLOB_CHOICE_LIST *parent_arg,
49 static int compare(
const void *p1,
const void *p2) {
95 "Merge the fragments in the ratings matrix and delete them "
100 "force associator to run regardless of what enable_assoc is."
101 "This is used for CJK where component grouping is necessary.");
102 INT_VAR_H(wordrec_num_seg_states, 30,
"Segmentation states");
105 "Use information from fragments to guide chopping process");
106 INT_VAR_H(repair_unchopped_blobs, 1,
"Fix blobs that aren't chopped");
113 INT_VAR_H(chop_min_outline_points, 6,
"Min Number of Points on Outline");
114 INT_VAR_H(chop_inside_angle, -50,
"Min Inside Angle Bend");
115 INT_VAR_H(chop_min_outline_area, 2000,
"Min Outline Area");
124 INT_VAR_H(segment_adjust_debug, 0,
"Segmentation adjustment debug");
126 "include fixed-pitch heuristics in char segmentation");
128 "use new state cost heuristics for segmentation state evaluation");
130 "base factor for adding segmentation cost into word rating."
131 "It's a multiplying factor, the larger the value above 1, "
132 "the bigger the effect of segmentation cost.");
134 "weight associated with char rating in combined cost of state");
136 "weight associated with width evidence in combined cost of state");
138 "weight associated with seam cut in combined cost of state");
140 "max char width-to-height ratio allowed in segmentation");
141 INT_VAR_H(wordrec_debug_level, 0,
"Debug level for wordrec");
142 BOOL_VAR_H(wordrec_debug_blamer,
false,
"Print blamer debug messages");
143 BOOL_VAR_H(wordrec_run_blamer,
false,
"Try to set the blame for errors");
145 "Enable new segmentation search path.");
146 INT_VAR_H(segsearch_debug_level, 0,
"SegSearch debug level");
147 INT_VAR_H(segsearch_max_pain_points, 2000,
148 "Maximum number of pain points stored in the queue");
149 INT_VAR_H(segsearch_max_futile_classifications, 10,
150 "Maximum number of pain point classifications per word.");
152 "Maximum character width-to-height ratio");
153 double_VAR_H(segsearch_max_fixed_pitch_char_wh_ratio, 2.0,
154 "Maximum character width-to-height ratio for"
155 "fixed pitch fonts");
157 "Save alternative paths found during chopping "
158 "and segmentation search");
168 bool ChoiceIsCorrect(
const UNICHARSET& uni_set,
174 void SaveAltChoices(
const LIST &best_choices,
WERD_RES *word);
178 void FillLattice(
const MATRIX &ratings,
const LIST &best_choices,
183 void CallFillLattice(
const MATRIX &ratings,
const LIST &best_choices,
185 (this->*fill_lattice_)(ratings, best_choices, unicharset, blamer_bundle);
189 void program_editup(
const char *textbase,
190 bool init_classifier,
193 void program_editdown(
inT32 elasped_time);
197 BLOB_CHOICE_LIST *call_matcher(
const DENORM* denorm,
TBLOB* blob);
200 BLOB_CHOICE_LIST *classify_blob(
TBLOB *blob,
205 BLOB_CHOICE_LIST *fake_classify_blob(
UNICHAR_ID class_id,
206 float rating,
float certainty);
207 void update_blob_classifications(
TWERD *word,
234 void expand_node(
FLOAT32 worst_priority,
255 const char* expanded_fragment_lengths,
261 BLOB_CHOICE_LIST *join_blobs_and_classify(
317 STATE *output_best_state,
332 void vertical_projection_point(
EDGEPT *split_point,
EDGEPT *target_point,
334 EDGEPT_CLIST *new_points);
338 bool italic_blob,
SEAMS seam_list);
340 bool italic_blob,
SEAMS seam_list);
343 bool italic_blob,
SEAMS seam_list);
344 bool improve_one_blob(
WERD_RES *word_res,
349 bool split_next_to_fragment,
351 void modify_blob_choice(BLOB_CHOICE_LIST *answer,
353 bool chop_one_blob(
TWERD *word,
357 int *right_chop_index);
361 void improve_by_chopping(
WERD_RES *word,
366 bool *updated_best_choice);
367 MATRIX *word_associator(
bool only_create_ratings_matrtix,
374 float rating_ceiling,
375 bool split_next_to_fragment);
377 void set_chopper_blame(
WERD_RES *word);
389 void delete_seam_pile(
SEAM_PILE seam_pile);
398 EDGEPT_CLIST *new_points,
409 void set_outline_bounds(
register EDGEPT *point1,
419 void reverse_outline(
EDGEPT *outline);
422 virtual BLOB_CHOICE_LIST *classify_piece(
TBLOB *pieces,
430 void merge_fragments(
MATRIX *ratings,
440 void get_fragment_lists(
inT16 current_frag,
443 inT16 num_frag_parts,
446 BLOB_CHOICE_LIST *choice_lists);
449 void merge_and_put_fragment_lists(
inT16 row,
451 inT16 num_frag_parts,
452 BLOB_CHOICE_LIST *choice_lists,
460 void fill_filtered_fragment_list(BLOB_CHOICE_LIST *choices,
463 BLOB_CHOICE_LIST *filtered_choices);
464 BLOB_CHOICE_LIST *get_piece_rating(
MATRIX *ratings,
511 const LIST &best_choices,
518 num_futile_classifications >=
519 segsearch_max_futile_classifications);
547 void UpdateSegSearchNodes(
int starting_col,
548 SEG_SEARCH_PENDING_LIST *pending[],
557 void ProcessSegSearchPainPoint(
float pain_point_priority,
560 SEG_SEARCH_PENDING_LIST *pending[],
568 void InitBlamerForSegSearch(
const WERD_CHOICE *best_choice,
575 void FinishBlamerForSegSearch(
const WERD_CHOICE *best_choice,
584 #endif // TESSERACT_WORDREC_WORDREC_H__