Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
normalis.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: normalis.h (Formerly denorm.h)
3  * Description: Code for the DENORM class.
4  * Author: Ray Smith
5  * Created: Thu Apr 23 09:22:43 BST 1992
6  *
7  * (C) Copyright 1992, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #ifndef NORMALIS_H
21 #define NORMALIS_H
22 
23 #include <stdio.h>
24 #include "host.h"
25 
26 const int kBlnCellHeight = 256; // Full-height for baseline normalization.
27 const int kBlnXHeight = 128; // x-height for baseline normalization.
28 const int kBlnBaselineOffset = 64; // offset for baseline normalization.
29 
30 struct Pix;
31 class ROW; // Forward decl
32 class BLOCK;
33 class FCOORD;
34 class TBLOB;
35 class TBOX;
36 class TPOINT;
37 class UNICHARSET;
38 
39 namespace tesseract {
40 // Possible normalization methods. Use NEGATIVE values as these also
41 // double up as markers for the last sub-classifier.
43  NM_BASELINE = -3, // The original BL normalization mode.
44  NM_CHAR_ISOTROPIC = -2, // Character normalization but isotropic.
45  NM_CHAR_ANISOTROPIC = -1 // The original CN normalization mode.
46 };
47 
48 } // namespace tesseract.
49 
50 class DENORM_SEG {
51  public:
53 
54  inT32 xstart; // start of segment
55  inT32 ycoord; // y at segment
56  float scale_factor; // normalized_x/scale_factor + x_center == original_x
57 };
58 
59 class DENORM {
60  public:
61  DENORM();
62  DENORM(float x, float scaling, ROW *src);
63  DENORM(float x, // from same pieces
64  float scaling,
65  double line_m, // default line: y = mx + c
66  double line_c,
67  inT16 seg_count, // no of segments
68  DENORM_SEG *seg_pts, // actual segments
69  BOOL8 using_row, // as baseline
70  ROW *src);
71  // Copying a DENORM is allowed.
72  DENORM(const DENORM &);
73  DENORM& operator=(const DENORM&);
74  ~DENORM();
75 
76  // Setup for a baseline normalization. If there are segs, then they
77  // are used, otherwise, if there is a row, that is used, otherwise the
78  // bottom of the word_box is used for the baseline.
79  void SetupBLNormalize(const BLOCK* block, const ROW* row, float x_height,
80  const TBOX& word_box,
81  int num_segs, const DENORM_SEG* segs);
82 
83  // Setup the normalization transformation parameters.
84  // The normalizations applied to a blob are as follows:
85  // 1. An optional block layout rotation that was applied during layout
86  // analysis to make the textlines horizontal.
87  // 2. A normalization transformation (LocalNormTransform):
88  // Subtract the "origin"
89  // Apply an x,y scaling.
90  // Apply an optional rotation.
91  // Add back a final translation.
92  // The origin is in the block-rotated space, and is usually something like
93  // the x-middle of the word at the baseline.
94  // 3. Zero or more further normalization transformations that are applied
95  // in sequence, with a similar pattern to the first normalization transform.
96  //
97  // A DENORM holds the parameters of a single normalization, and can execute
98  // both the LocalNormTransform (a forwards normalization), and the
99  // LocalDenormTransform which is an inverse transform or de-normalization.
100  // A DENORM may point to a predecessor DENORM, which is actually the earlier
101  // normalization, so the full normalization sequence involves executing all
102  // predecessors first and then the transform in "this".
103  // Let x be image co-ordinates and that we have normalization classes A, B, C
104  // where we first apply A then B then C to get normalized x':
105  // x' = CBAx
106  // Then the backwards (to original coordinates) would be:
107  // x = A^-1 B^-1 C^-1 x'
108  // and A = B->predecessor_ and B = C->predecessor_
109  // NormTransform executes all predecessors recursively, and then this.
110  // NormTransform would be used to transform an image-based feature to
111  // normalized space for use in a classifier
112  // DenormTransform inverts this and then all predecessors. It can be
113  // used to get back to the original image coordinates from normalized space.
114  // The LocalNormTransform member executes just the transformation
115  // in "this" without the layout rotation or any predecessors. It would be
116  // used to run each successive normalization, eg the word normalization,
117  // and later the character normalization.
118 
119  // Arguments:
120  // block: if not NULL, then this is the first transformation, and
121  // block->re_rotation() needs to be used after the Denorm
122  // transformation to get back to the image coords.
123  // row: if not NULL, then row->baseline(x) is added to the y_origin, unless
124  // segs is not NULL and num_segs > 0, in which case they are used.
125  // rotation: if not NULL, apply this rotation after translation to the
126  // origin and scaling. (Usually a classify rotation.)
127  // predecessor: if not NULL, then predecessor has been applied to the
128  // input space and needs to be undone to complete the inverse.
129  // segs: if not NULL and num_segs > 0, then the segs provide the y_origin
130  // and the y_scale at a given source x.
131  // num_segs: the number of segs.
132  // The above pointers are not owned by this DENORM and are assumed to live
133  // longer than this denorm, except rotation and segs, which are deep
134  // copied on input.
135  //
136  // x_origin: The x origin which will be mapped to final_xshift in the result.
137  // y_origin: The y origin which will be mapped to final_yshift in the result.
138  // Added to result of row->baseline(x) if not NULL.
139  //
140  // x_scale: scale factor for the x-coordinate.
141  // y_scale: scale factor for the y-coordinate. Ignored if segs is given.
142  // Note that these scale factors apply to the same x and y system as the
143  // x-origin and y-origin apply, ie after any block rotation, but before
144  // the rotation argument is applied.
145  //
146  // final_xshift: The x component of the final translation.
147  // final_yshift: The y component of the final translation.
148  //
149  // In theory, any of the commonly used normalizations can be setup here:
150  // * Traditional baseline normalization on a word:
151  // SetupNormalization(block, row, NULL, NULL, NULL, 0,
152  // box.x_middle(), 0.0f,
153  // kBlnXHeight / x_height, kBlnXHeight / x_height,
154  // 0, kBlnBaselineOffset);
155  // * Numeric mode baseline normalization on a word:
156  // SetupNormalization(block, NULL, NULL, NULL, segs, num_segs,
157  // box.x_middle(), 0.0f,
158  // kBlnXHeight / x_height, kBlnXHeight / x_height,
159  // 0, kBlnBaselineOffset);
160  // * Anisotropic character normalization used by IntFx.
161  // SetupNormalization(NULL, NULL, NULL, denorm, NULL, 0,
162  // centroid_x, centroid_y,
163  // 51.2 / ry, 51.2 / rx, 128, 128);
164  // * Normalize blob height to x-height (current OSD):
165  // SetupNormalization(NULL, NULL, &rotation, NULL, NULL, 0,
166  // box.rotational_x_middle(rotation),
167  // box.rotational_y_middle(rotation),
168  // kBlnXHeight / box.rotational_height(rotation),
169  // kBlnXHeight / box.rotational_height(rotation),
170  // 0, kBlnBaselineOffset);
171  // * Secondary normalization for classification rotation (current):
172  // FCOORD rotation = block->classify_rotation();
173  // float target_height = kBlnXHeight / CCStruct::kXHeightCapRatio;
174  // SetupNormalization(NULL, NULL, &rotation, denorm, NULL, 0,
175  // box.rotational_x_middle(rotation),
176  // box.rotational_y_middle(rotation),
177  // target_height / box.rotational_height(rotation),
178  // target_height / box.rotational_height(rotation),
179  // 0, kBlnBaselineOffset);
180  // * Proposed new normalizations for CJK: Between them there is then
181  // no need for further normalization at all, and the character fills the cell.
182  // ** Replacement for baseline normalization on a word:
183  // Scales height and width independently so that modal height and pitch
184  // fill the cell respectively.
185  // float cap_height = x_height / CCStruct::kXHeightCapRatio;
186  // SetupNormalization(block, row, NULL, NULL, NULL, 0,
187  // box.x_middle(), cap_height / 2.0f,
188  // kBlnCellHeight / fixed_pitch,
189  // kBlnCellHeight / cap_height,
190  // 0, 0);
191  // ** Secondary normalization for classification (with rotation) (proposed):
192  // Requires a simple translation to the center of the appropriate character
193  // cell, no further scaling and a simple rotation (or nothing) about the
194  // cell center.
195  // FCOORD rotation = block->classify_rotation();
196  // SetupNormalization(NULL, NULL, &rotation, denorm, NULL, 0,
197  // fixed_pitch_cell_center,
198  // 0.0f,
199  // 1.0f,
200  // 1.0f,
201  // 0, 0);
202  void SetupNormalization(const BLOCK* block,
203  const ROW* row,
204  const FCOORD* rotation,
205  const DENORM* predecessor,
206  const DENORM_SEG* segs, int num_segs,
207  float x_origin, float y_origin,
208  float x_scale, float y_scale,
209  float final_xshift, float final_yshift);
210 
211  // Transforms the given coords one step forward to normalized space, without
212  // using any block rotation or predecessor.
213  void LocalNormTransform(const TPOINT& pt, TPOINT* transformed) const;
214  void LocalNormTransform(const FCOORD& pt, FCOORD* transformed) const;
215  // Transforms the given coords forward to normalized space using the
216  // full transformation sequence defined by the block rotation, the
217  // predecessors, deepest first, and finally this.
218  void NormTransform(const TPOINT& pt, TPOINT* transformed) const;
219  void NormTransform(const FCOORD& pt, FCOORD* transformed) const;
220  // Transforms the given coords one step back to source space, without
221  // using to any block rotation or predecessor.
222  void LocalDenormTransform(const TPOINT& pt, TPOINT* original) const;
223  void LocalDenormTransform(const FCOORD& pt, FCOORD* original) const;
224  // Transforms the given coords all the way back to source image space using
225  // the full transformation sequence defined by this and its predecesors
226  // recursively, shallowest first, and finally any block re_rotation.
227  void DenormTransform(const TPOINT& pt, TPOINT* original) const;
228  void DenormTransform(const FCOORD& pt, FCOORD* original) const;
229 
230  // Normalize a blob using blob transformations. Less accurate, but
231  // more accurately copies the old way.
232  void LocalNormBlob(TBLOB* blob) const;
233 
234  // Fills in the x-height range accepted by the given unichar_id, given its
235  // bounding box in the usual baseline-normalized coordinates, with some
236  // initial crude x-height estimate (such as word size) and this denoting the
237  // transformation that was used. Returns false, and an empty range if the
238  // bottom is a mis-fit. Returns true and empty [0, 0] range if the bottom
239  // fits, but the top is impossible.
240  bool XHeightRange(int unichar_id, const UNICHARSET& unicharset,
241  const TBOX& bbox, inT16* min_xht, inT16* max_xht) const;
242 
243  Pix* pix() const {
244  return pix_;
245  }
246  void set_pix(Pix* pix) {
247  pix_ = pix;
248  }
249  bool inverse() const {
250  return inverse_;
251  }
252  void set_inverse(bool value) {
253  inverse_ = value;
254  }
255  const DENORM* RootDenorm() const {
256  if (predecessor_ != NULL)
257  return predecessor_->RootDenorm();
258  return this;
259  }
260  const DENORM* predecessor() const {
261  return predecessor_;
262  }
263  // Accessors - perhaps should not be needed.
264  float x_scale() const {
265  return x_scale_;
266  }
267  float y_scale() const {
268  return y_scale_;
269  }
270  const ROW *row() const {
271  return row_;
272  }
273  void set_row(ROW* row) {
274  row_ = row;
275  }
276  const BLOCK* block() const {
277  return block_;
278  }
279  void set_block(const BLOCK* block) {
280  block_ = block;
281  }
282 
283  private:
284  // Free allocated memory and clear pointers.
285  void Clear();
286  // Setup default values.
287  void Init();
288 
289  // Returns the y-origin at the original (un-normalized) x.
290  float YOriginAtOrigX(float orig_x) const;
291 
292  // Returns the y-scale at the original (un-normalized) x.
293  float YScaleAtOrigX(float orig_x) const;
294 
295  // Deep copy the array of segments for use as a y_origin and y_scale.
296  void SetSegments(const DENORM_SEG* new_segs, int seg_count);
297 
298  // Finds the appropriate segment for a given original x-coord
299  const DENORM_SEG* BinarySearchSegment(float orig_x) const;
300 
301  // Best available image.
302  Pix* pix_;
303  // True if the source image is white-on-black.
304  bool inverse_;
305  // Block the word came from. If not null, block->re_rotation() takes the
306  // "untransformed" coordinates even further back to the original image.
307  const BLOCK* block_;
308  // Row the word came from. If not null, row->baseline() is added to y_origin_.
309  const ROW* row_;
310  // Rotation to apply between translation to the origin and scaling.
311  const FCOORD* rotation_;
312  // Previous transformation in a chain.
313  const DENORM* predecessor_;
314  // Array of segments used to specify local y_origin_ and y_scale_.
315  // Owned by the DENORM.
316  DENORM_SEG *segs_;
317  // Size of the segs_ array.
318  int num_segs_;
319  // x-coordinate to be mapped to final_xshift_ in the result.
320  float x_origin_;
321  // y-coordinate to be mapped to final_yshift_ in the result.
322  float y_origin_;
323  // Scale factors for x and y coords. Applied to pre-rotation system.
324  float x_scale_;
325  float y_scale_;
326  // Destination coords of the x_origin_ and y_origin_.
327  float final_xshift_;
328  float final_yshift_;
329 };
330 #endif