Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
colfind.h
Go to the documentation of this file.
1 
2 // File: colfind.h
3 // Description: Class to find columns in the grid of BLOBNBOXes.
4 // Author: Ray Smith
5 // Created: Thu Feb 21 14:04:01 PST 2008
6 //
7 // (C) Copyright 2008, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifndef TESSERACT_TEXTORD_COLFIND_H__
21 #define TESSERACT_TEXTORD_COLFIND_H__
22 
23 #include "tabfind.h"
24 #include "imagefind.h"
25 #include "colpartitiongrid.h"
26 #include "colpartitionset.h"
27 #include "ocrblock.h"
28 #include "textlineprojection.h"
29 
30 class BLOCK_LIST;
31 struct Boxa;
32 struct Pixa;
33 class DENORM;
34 class ScrollView;
35 class STATS;
36 class TO_BLOCK;
37 
38 namespace tesseract {
39 
40 extern BOOL_VAR_H(textord_tabfind_find_tables, false, "run table detection");
41 
42 class ColPartitionSet;
43 class ColPartitionSet_LIST;
44 class ColSegment_LIST;
45 class ColumnGroup_LIST;
46 class LineSpacing;
47 class StrokeWidth;
48 class TempColumn_LIST;
49 class EquationDetectBase;
50 
51 // The ColumnFinder class finds columns in the grid.
52 class ColumnFinder : public TabFind {
53  public:
54  // Gridsize is an estimate of the text size in the image. A suitable value
55  // is in TO_BLOCK::line_size after find_components has been used to make
56  // the blobs.
57  // bleft and tright are the bounds of the image (rectangle) being processed.
58  // vlines is a (possibly empty) list of TabVector and vertical_x and y are
59  // the sum logical vertical vector produced by LineFinder::FindVerticalLines.
60  ColumnFinder(int gridsize, const ICOORD& bleft, const ICOORD& tright,
61  int resolution, TabVector_LIST* vlines, TabVector_LIST* hlines,
62  int vertical_x, int vertical_y);
63  virtual ~ColumnFinder();
64 
65  // Accessors for testing
66  const DENORM* denorm() const {
67  return denorm_;
68  }
69  const TextlineProjection* projection() const {
70  return &projection_;
71  }
72 
73  // ======================================================================
74  // The main function of ColumnFinder is broken into pieces to facilitate
75  // optional insertion of orientation and script detection in an efficient
76  // way. The calling sequence IS MANDATORY however, whether or not
77  // OSD is being used:
78  // 1. Construction.
79  // 2. SetupAndFilterNoise.
80  // 3. IsVerticallyAlignedText.
81  // 4. CorrectOrientation.
82  // 5. FindBlocks.
83  // 6. Destruction. Use of a single column finder for multiple images does not
84  // make sense.
85  // Throughout these steps, the ColPartitions are owned by part_grid_, which
86  // means that that it must be kept correct. Exception: big_parts_ owns its
87  // own ColPartitions.
88  // The BLOBNBOXes are owned by the input TO_BLOCK for the whole time, except
89  // for a phase in FindBlocks before TransformToBlocks, when they become
90  // owned by the ColPartitions. The owner() ColPartition of a BLOBNBOX
91  // indicates more of a betrothal for the majority of layout analysis, ie
92  // which ColPartition will take ownership when the blobs are release from
93  // the input TO_BLOCK. Exception: image_bblobs_ owns the fake blobs that
94  // are part of the image regions, as they are not on any TO_BLOCK list.
95  // TODO(rays) break up column finder further into smaller classes, as
96  // there is a lot more to it than column finding now.
97  // ======================================================================
98 
99  // Performs initial processing on the blobs in the input_block:
100  // Setup the part_grid, stroke_width_, nontext_map_.
101  // Obvious noise blobs are filtered out and used to mark the nontext_map_.
102  // Initial stroke-width analysis is used to get local text alignment
103  // direction, so the textline projection_ map can be setup.
104  // On return, IsVerticallyAlignedText may be called (now optionally) to
105  // determine the gross textline alignment of the page.
106  void SetupAndFilterNoise(Pix* photo_mask_pix, TO_BLOCK* input_block);
107 
108  // Tests for vertical alignment of text (returning true if so), and generates
109  // a list of blobs (in osd_blobs) for orientation and script detection.
110  // block is the single block for the whole page or rectangle to be OCRed.
111  // Note that the vertical alignment may be due to text whose writing direction
112  // is vertical, like say Japanese, or due to text whose writing direction is
113  // horizontal but whose text appears vertically aligned because the image is
114  // not the right way up.
115  bool IsVerticallyAlignedText(TO_BLOCK* block, BLOBNBOX_CLIST* osd_blobs);
116 
117  // Rotates the blobs and the TabVectors so that the gross writing direction
118  // (text lines) are horizontal and lines are read down the page.
119  // Applied rotation stored in rotation_.
120  // A second rotation is calculated for application during recognition to
121  // make the rotated blobs upright for recognition.
122  // Subsequent rotation stored in text_rotation_.
123  //
124  // Arguments:
125  // vertical_text_lines is true if the text lines are vertical.
126  // recognition_rotation [0..3] is the number of anti-clockwise 90 degree
127  // rotations from osd required for the text to be upright and readable.
128  void CorrectOrientation(TO_BLOCK* block, bool vertical_text_lines,
129  int recognition_rotation);
130 
131  // Finds blocks of text, image, rule line, table etc, returning them in the
132  // blocks and to_blocks
133  // (Each TO_BLOCK points to the basic BLOCK and adds more information.)
134  // Image blocks are generated by a combination of photo_mask_pix (which may
135  // NOT be NULL) and the rejected text found during preliminary textline
136  // finding.
137  // The input_block is the result of a call to find_components, and contains
138  // the blobs found in the image or rectangle to be OCRed. These blobs will be
139  // removed and placed in the output blocks, while unused ones will be deleted.
140  // If single_column is true, the input is treated as single column, but
141  // it is still divided into blocks of equal line spacing/text size.
142  // scaled_color is scaled down by scaled_factor from the input color image,
143  // and may be NULL if the input was not color.
144  // Returns -1 if the user hits the 'd' key in the blocks window while running
145  // in debug mode, which requests a retry with more debug info.
146  int FindBlocks(bool single_column,
147  Pix* scaled_color, int scaled_factor,
148  TO_BLOCK* block, Pix* photo_mask_pix,
149  BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);
150 
151  // Get the rotation required to deskew, and its inverse rotation.
152  void GetDeskewVectors(FCOORD* deskew, FCOORD* reskew);
153 
154  // Set the equation detection pointer.
156 
157  private:
158  // Displays the blob and block bounding boxes in a window called Blocks.
159  void DisplayBlocks(BLOCK_LIST* blocks);
160  // Displays the column edges at each grid y coordinate defined by
161  // best_columns_.
162  void DisplayColumnBounds(PartSetVector* sets);
163 
165 
166  // Sets up column_sets_ (the determined column layout at each horizontal
167  // slice). Returns false if the page is empty.
168  bool MakeColumns(bool single_column);
169  // Attempt to improve the column_candidates by expanding the columns
170  // and adding new partitions from the partition sets in src_sets.
171  // Src_sets may be equal to column_candidates, in which case it will
172  // use them as a source to improve themselves.
173  void ImproveColumnCandidates(PartSetVector* src_sets,
174  PartSetVector* column_sets);
175  // Prints debug information on the column candidates.
176  void PrintColumnCandidates(const char* title);
177  // Finds the optimal set of columns that cover the entire image with as
178  // few changes in column partition as possible.
179  void AssignColumns(const PartSetVector& part_sets);
180  // Finds the biggest range in part_sets_ that has no assigned column, but
181  // column assignment is possible.
182  bool BiggestUnassignedRange(int set_count, const bool* any_columns_possible,
183  int* start, int* end);
184  // Finds the modal compatible column_set_ index within the given range.
185  int RangeModalColumnSet(int** column_set_costs, const int* assigned_costs,
186  int start, int end);
187  // Given that there are many column_set_id compatible columns in the range,
188  // shrinks the range to the longest contiguous run of compatibility, allowing
189  // gaps where no columns are possible, but not where competing columns are
190  // possible.
191  void ShrinkRangeToLongestRun(int** column_set_costs,
192  const int* assigned_costs,
193  const bool* any_columns_possible,
194  int column_set_id,
195  int* best_start, int* best_end);
196  // Moves start in the direction of step, upto, but not including end while
197  // the only incompatible regions are no more than kMaxIncompatibleColumnCount
198  // in size, and the compatible regions beyond are bigger.
199  void ExtendRangePastSmallGaps(int** column_set_costs,
200  const int* assigned_costs,
201  const bool* any_columns_possible,
202  int column_set_id,
203  int step, int end, int* start);
204  // Assigns the given column_set_id to the part_sets_ in the given range.
205  void AssignColumnToRange(int column_set_id, int start, int end,
206  int** column_set_costs, int* assigned_costs);
207 
208  // Computes the mean_column_gap_.
209  void ComputeMeanColumnGap();
210 
213 
214  // Hoovers up all un-owned blobs and deletes them.
215  // The rest get released from the block so the ColPartitions can pass
216  // ownership to the output blocks.
217  void ReleaseBlobsAndCleanupUnused(TO_BLOCK* block);
218  // Splits partitions that cross columns where they have nothing in the gap.
219  void GridSplitPartitions();
220  // Merges partitions where there is vertical overlap, within a single column,
221  // and the horizontal gap is small enough.
222  void GridMergePartitions();
223  // Inserts remaining noise blobs into the most applicable partition if any.
224  // If there is no applicable partition, then the blobs are deleted.
225  void InsertRemainingNoise(TO_BLOCK* block);
226  // Remove partitions that come from horizontal lines that look like
227  // underlines, but are not part of a table.
228  void GridRemoveUnderlinePartitions();
229  // Add horizontal line separators as partitions.
230  void GridInsertHLinePartitions();
231  // Add vertical line separators as partitions.
232  void GridInsertVLinePartitions();
233  // For every ColPartition in the grid, sets its type based on position
234  // in the columns.
235  void SetPartitionTypes();
236  // Only images remain with multiple types in a run of partners.
237  // Sets the type of all in the group to the maximum of the group.
238  void SmoothPartnerRuns();
239 
241 
242  // Helper functions for TransformToBlocks.
243  // Add the part to the temp list in the correct order.
244  void AddToTempPartList(ColPartition* part, ColPartition_CLIST* temp_list);
245  // Add everything from the temp list to the work_set assuming correct order.
246  void EmptyTempPartList(ColPartition_CLIST* temp_list,
247  WorkingPartSet_LIST* work_set);
248 
249  // Transform the grid of partitions to the output blocks.
250  void TransformToBlocks(BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);
251 
252  // Reflect the blob boxes (but not the outlines) in the y-axis so that
253  // the blocks get created in the correct RTL order. Rotates the blobs
254  // in the input_block and the bblobs list.
255  // The reflection is undone in RotateAndReskewBlocks by
256  // reflecting the blocks themselves, and then recomputing the blob bounding
257  // boxes.
258  void ReflectForRtl(TO_BLOCK* input_block, BLOBNBOX_LIST* bblobs);
259 
260  // Undo the deskew that was done in FindTabVectors, as recognition is done
261  // without correcting blobs or blob outlines for skew.
262  // Reskew the completed blocks to put them back to the original rotated coords
263  // that were created by CorrectOrientation.
264  // If the input_is_rtl, then reflect the blocks in the y-axis to undo the
265  // reflection that was done before FindTabVectors.
266  // Blocks that were identified as vertical text (relative to the rotated
267  // coordinates) are further rotated so the text lines are horizontal.
268  // blob polygonal outlines are rotated to match the position of the blocks
269  // that they are in, and their bounding boxes are recalculated to be accurate.
270  // Record appropriate inverse transformations and required
271  // classifier transformation in the blocks.
272  void RotateAndReskewBlocks(bool input_is_rtl, TO_BLOCK_LIST* to_blocks);
273 
274  // Computes the rotations for the block (to make textlines horizontal) and
275  // for the blobs (for classification) and sets the appropriate members
276  // of the given block.
277  // Returns the rotation that needs to be applied to the blobs to make
278  // them sit in the rotated block.
279  FCOORD ComputeBlockAndClassifyRotation(BLOCK* block);
280 
281  // The minimum gutter width to apply for finding columns.
282  // Modified when vertical text is detected to prevent detection of
283  // vertical text lines as columns.
284  int min_gutter_width_;
285  // The mean gap between columns over the page.
286  int mean_column_gap_;
287  // The rotation vector needed to convert original coords to deskewed.
288  FCOORD deskew_;
289  // The rotation vector needed to convert deskewed back to original coords.
290  FCOORD reskew_;
291  // The rotation vector used to rotate vertically oriented pages.
292  FCOORD rotation_;
293  // The rotation vector needed to convert the rotated back to original coords.
294  FCOORD rerotate_;
295  // The additional rotation vector needed to rotate text for recognition.
296  FCOORD text_rotation_;
297  // The column_sets_ contain the ordered candidate ColPartitionSets that
298  // define the possible divisions of the page into columns.
299  PartSetVector column_sets_;
300  // A simple array of pointers to the best assigned column division at
301  // each grid y coordinate.
302  ColPartitionSet** best_columns_;
303  // The grid used for creating initial partitions with strokewidth.
304  StrokeWidth* stroke_width_;
305  // The grid used to hold ColPartitions after the columns have been determined.
306  ColPartitionGrid part_grid_;
307  // List of ColPartitions that are no longer needed after they have been
308  // turned into regions, but are kept around because they are referenced
309  // by the part_grid_.
310  ColPartition_LIST good_parts_;
311  // List of ColPartitions that are big and might be dropcap or vertically
312  // joined.
313  ColPartition_LIST big_parts_;
314  // List of ColPartitions that have been declared noise.
315  ColPartition_LIST noise_parts_;
316  // The fake blobs that are made from the images.
317  BLOBNBOX_LIST image_bblobs_;
318  // Horizontal line separators.
319  TabVector_LIST horizontal_lines_;
320  // Image map of photo/noise areas on the page.
321  Pix* nontext_map_;
322  // Textline projection map.
323  TextlineProjection projection_;
324  // Sequence of DENORMS that indicate how to get back to the original image
325  // coordinate space. The destructor must delete all the DENORMs in the chain.
326  DENORM* denorm_;
327 
328  // Various debug windows that automatically go away on completion.
329  ScrollView* input_blobs_win_;
330 
331  // The equation region detector pointer. Note: This pointer is passed in by
332  // member function SetEquationDetect, and releasing it is NOT owned by this
333  // class.
334  EquationDetectBase* equation_detect_;
335 
336  // Allow a subsequent instance to reuse the blocks window.
337  // Not thread-safe, but multiple threads shouldn't be using windows anyway.
338  static ScrollView* blocks_win_;
339 };
340 
341 } // namespace tesseract.
342 
343 #endif // TESSERACT_TEXTORD_COLFIND_H__