Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
pitsync1.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: pitsync1.cpp (Formerly pitsync.c)
3  * Description: Code to find the optimum fixed pitch segmentation of some blobs.
4  * Author: Ray Smith
5  * Created: Thu Nov 19 11:48:05 GMT 1992
6  *
7  * (C) Copyright 1992, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include "mfcpch.h"
21 #ifdef __UNIX__
22 #include <assert.h>
23 #endif
24 #include <math.h>
25 #include "memry.h"
26 #include "pitsync1.h"
27 
28 #include "notdll.h"
29 
30 ELISTIZE (FPSEGPT) CLISTIZE (FPSEGPT_LIST)
31 #define EXTERN
32 EXTERN
33 INT_VAR (pitsync_linear_version, 6, "Use new fast algorithm");
34 EXTERN
36 "Dist inside big blob for chopping");
37 EXTERN
39 "Fraction of cut for free cuts");
40 EXTERN
41 INT_VAR (pitsync_fake_depth, 1, "Max advance fake generation");
42 
43 /**********************************************************************
44  * FPSEGPT::FPSEGPT
45  *
46  * Constructor to make a new FPSEGPT.
47  * The existing FPCUTPT is duplicated.
48  **********************************************************************/
49 
50 FPSEGPT::FPSEGPT( //constructor
51  FPCUTPT *cutpt //create from new form
52  ) {
53  pred = NULL;
54  mean_sum = cutpt->sum ();
55  sq_sum = cutpt->squares ();
56  cost = cutpt->cost_function ();
57  faked = cutpt->faked;
58  terminal = cutpt->terminal;
59  fake_count = cutpt->fake_count;
60  xpos = cutpt->position ();
61  mid_cuts = cutpt->cheap_cuts ();
62 }
63 
64 
65 /**********************************************************************
66  * FPSEGPT::FPSEGPT
67  *
68  * Constructor to make a new FPSEGPT.
69  **********************************************************************/
70 
71 FPSEGPT::FPSEGPT ( //constructor
72 inT16 x //position
73 ):xpos (x) {
74  pred = NULL;
75  mean_sum = 0;
76  sq_sum = 0;
77  cost = 0;
78  faked = FALSE;
79  terminal = FALSE;
80  fake_count = 0;
81  mid_cuts = 0;
82 }
83 
84 
85 /**********************************************************************
86  * FPSEGPT::FPSEGPT
87  *
88  * Constructor to make a new FPSEGPT.
89  **********************************************************************/
90 
91 FPSEGPT::FPSEGPT ( //constructor
92 inT16 x, //position
93 BOOL8 faking, //faking this one
94 inT16 offset, //dist to gap
95 inT16 region_index, //segment number
96 inT16 pitch, //proposed pitch
97 inT16 pitch_error, //allowed tolerance
98 FPSEGPT_LIST * prev_list //previous segment
99 ):xpos (x) {
100  inT16 best_fake; //on previous
101  FPSEGPT *segpt; //segment point
102  inT32 dist; //from prev segment
103  double sq_dist; //squared distance
104  double mean; //mean pitch
105  double total; //total dists
106  double factor; //cost function
107  FPSEGPT_IT pred_it = prev_list;//for previuos segment
108 
109  cost = MAX_FLOAT32;
110  pred = NULL;
111  faked = faking;
112  terminal = FALSE;
113  best_fake = MAX_INT16;
114  mid_cuts = 0;
115  for (pred_it.mark_cycle_pt (); !pred_it.cycled_list (); pred_it.forward ()) {
116  segpt = pred_it.data ();
117  if (segpt->fake_count < best_fake)
118  best_fake = segpt->fake_count;
119  dist = x - segpt->xpos;
120  if (dist >= pitch - pitch_error && dist <= pitch + pitch_error
121  && !segpt->terminal) {
122  total = segpt->mean_sum + dist;
123  sq_dist = dist * dist + segpt->sq_sum + offset * offset;
124  //sum of squarees
125  mean = total / region_index;
126  factor = mean - pitch;
127  factor *= factor;
128  factor += sq_dist / (region_index) - mean * mean;
129  if (factor < cost) {
130  cost = factor; //find least cost
131  pred = segpt; //save path
132  mean_sum = total;
133  sq_sum = sq_dist;
134  fake_count = segpt->fake_count + faked;
135  }
136  }
137  }
138  if (fake_count > best_fake + 1)
139  pred = NULL; //fail it
140 }
141 
142 
143 /**********************************************************************
144  * check_pitch_sync
145  *
146  * Construct the lattice of possible segmentation points and choose the
147  * optimal path. Return the optimal path only.
148  * The return value is a measure of goodness of the sync.
149  **********************************************************************/
150 
151 double check_pitch_sync( //find segmentation
152  BLOBNBOX_IT *blob_it, //blobs to do
153  inT16 blob_count, //no of blobs
154  inT16 pitch, //pitch estimate
155  inT16 pitch_error, //tolerance
156  STATS *projection, //vertical
157  FPSEGPT_LIST *seg_list //output list
158  ) {
159  inT16 x; //current coord
160  inT16 min_index; //blob number
161  inT16 max_index; //blob number
162  inT16 left_edge; //of word
163  inT16 right_edge; //of word
164  inT16 right_max; //max allowed x
165  inT16 min_x; //in this region
166  inT16 max_x;
167  inT16 region_index;
168  inT16 best_region_index = 0; //for best result
169  inT16 offset; //dist to legal area
170  inT16 left_best_x; //edge of good region
171  inT16 right_best_x; //right edge
172  TBOX min_box; //bounding box
173  TBOX max_box; //bounding box
174  TBOX next_box; //box of next blob
175  FPSEGPT *segpt; //segment point
176  FPSEGPT_LIST *segpts; //points in a segment
177  double best_cost; //best path
178  double mean_sum; //computes result
179  FPSEGPT *best_end; //end of best path
180  BLOBNBOX_IT min_it; //copy iterator
181  BLOBNBOX_IT max_it; //copy iterator
182  FPSEGPT_IT segpt_it; //iterator
183  //output segments
184  FPSEGPT_IT outseg_it = seg_list;
185  FPSEGPT_LIST_CLIST lattice; //list of lists
186  //region iterator
187  FPSEGPT_LIST_C_IT lattice_it = &lattice;
188 
189  // tprintf("Computing sync on word of %d blobs with pitch %d\n",
190  // blob_count, pitch);
191  // if (blob_count==8 && pitch==27)
192  // projection->print(stdout,TRUE);
193  if (pitch < 3)
194  pitch = 3; //nothing ludicrous
195  if ((pitch - 3) / 2 < pitch_error)
196  pitch_error = (pitch - 3) / 2;
197  min_it = *blob_it;
198  min_box = box_next (&min_it); //get box
199  // if (blob_count==8 && pitch==27)
200  // tprintf("1st box at (%d,%d)->(%d,%d)\n",
201  // min_box.left(),min_box.bottom(),
202  // min_box.right(),min_box.top());
203  //left of word
204  left_edge = min_box.left () + pitch_error;
205  for (min_index = 1; min_index < blob_count; min_index++) {
206  min_box = box_next (&min_it);
207  // if (blob_count==8 && pitch==27)
208  // tprintf("Box at (%d,%d)->(%d,%d)\n",
209  // min_box.left(),min_box.bottom(),
210  // min_box.right(),min_box.top());
211  }
212  right_edge = min_box.right (); //end of word
213  max_x = left_edge;
214  //min permissible
215  min_x = max_x - pitch + pitch_error * 2 + 1;
216  right_max = right_edge + pitch - pitch_error - 1;
217  segpts = new FPSEGPT_LIST; //list of points
218  segpt_it.set_to_list (segpts);
219  for (x = min_x; x <= max_x; x++) {
220  segpt = new FPSEGPT (x); //make a new one
221  //put in list
222  segpt_it.add_after_then_move (segpt);
223  }
224  //first segment
225  lattice_it.add_before_then_move (segpts);
226  min_index = 0;
227  region_index = 1;
228  best_cost = MAX_FLOAT32;
229  best_end = NULL;
230  min_it = *blob_it;
231  min_box = box_next (&min_it); //first box
232  do {
233  left_best_x = -1;
234  right_best_x = -1;
235  segpts = new FPSEGPT_LIST; //list of points
236  segpt_it.set_to_list (segpts);
237  min_x += pitch - pitch_error;//next limits
238  max_x += pitch + pitch_error;
239  while (min_box.right () < min_x && min_index < blob_count) {
240  min_index++;
241  min_box = box_next (&min_it);
242  }
243  max_it = min_it;
244  max_index = min_index;
245  max_box = min_box;
246  next_box = box_next (&max_it);
247  for (x = min_x; x <= max_x && x <= right_max; x++) {
248  while (x < right_edge && max_index < blob_count
249  && x > max_box.right ()) {
250  max_index++;
251  max_box = next_box;
252  next_box = box_next (&max_it);
253  }
254  if (x <= max_box.left () + pitch_error
255  || x >= max_box.right () - pitch_error || x >= right_edge
256  || (max_index < blob_count - 1 && x >= next_box.left ())
257  || (x - max_box.left () > pitch * pitsync_joined_edge
258  && max_box.right () - x > pitch * pitsync_joined_edge)) {
259  // || projection->local_min(x))
260  if (x - max_box.left () > 0
261  && x - max_box.left () <= pitch_error)
262  //dist to real break
263  offset = x - max_box.left ();
264  else if (max_box.right () - x > 0
265  && max_box.right () - x <= pitch_error
266  && (max_index >= blob_count - 1
267  || x < next_box.left ()))
268  offset = max_box.right () - x;
269  else
270  offset = 0;
271  // offset=pitsync_offset_freecut_fraction*projection->pile_count(x);
272  segpt = new FPSEGPT (x, FALSE, offset, region_index,
273  pitch, pitch_error, lattice_it.data ());
274  }
275  else {
276  offset = projection->pile_count (x);
277  segpt = new FPSEGPT (x, TRUE, offset, region_index,
278  pitch, pitch_error, lattice_it.data ());
279  }
280  if (segpt->previous () != NULL) {
281  segpt_it.add_after_then_move (segpt);
282  if (x >= right_edge - pitch_error) {
283  segpt->terminal = TRUE;//no more wanted
284  if (segpt->cost_function () < best_cost) {
285  best_cost = segpt->cost_function ();
286  //find least
287  best_end = segpt;
288  best_region_index = region_index;
289  left_best_x = x;
290  right_best_x = x;
291  }
292  else if (segpt->cost_function () == best_cost
293  && right_best_x == x - 1)
294  right_best_x = x;
295  }
296  }
297  else {
298  delete segpt; //no good
299  }
300  }
301  if (segpts->empty ()) {
302  if (best_end != NULL)
303  break; //already found one
304  make_illegal_segment (lattice_it.data (), min_box, min_it,
305  region_index, pitch, pitch_error, segpts);
306  }
307  else {
308  if (right_best_x > left_best_x + 1) {
309  left_best_x = (left_best_x + right_best_x + 1) / 2;
310  for (segpt_it.mark_cycle_pt (); !segpt_it.cycled_list ()
311  && segpt_it.data ()->position () != left_best_x;
312  segpt_it.forward ());
313  if (segpt_it.data ()->position () == left_best_x)
314  //middle of region
315  best_end = segpt_it.data ();
316  }
317  }
318  //new segment
319  lattice_it.add_before_then_move (segpts);
320  region_index++;
321  }
322  while (min_x < right_edge);
323  ASSERT_HOST (best_end != NULL);//must always find some
324 
325  for (lattice_it.mark_cycle_pt (); !lattice_it.cycled_list ();
326  lattice_it.forward ()) {
327  segpts = lattice_it.data ();
328  segpt_it.set_to_list (segpts);
329  // if (blob_count==8 && pitch==27)
330  // {
331  // for (segpt_it.mark_cycle_pt();!segpt_it.cycled_list();segpt_it.forward())
332  // {
333  // segpt=segpt_it.data();
334  // tprintf("At %d, (%x) cost=%g, m=%g, sq=%g, pred=%x\n",
335  // segpt->position(),segpt,segpt->cost_function(),
336  // segpt->sum(),segpt->squares(),segpt->previous());
337  // }
338  // tprintf("\n");
339  // }
340  for (segpt_it.mark_cycle_pt (); !segpt_it.cycled_list ()
341  && segpt_it.data () != best_end; segpt_it.forward ());
342  if (segpt_it.data () == best_end) {
343  //save good one
344  segpt = segpt_it.extract ();
345  outseg_it.add_before_then_move (segpt);
346  best_end = segpt->previous ();
347  }
348  }
349  ASSERT_HOST (best_end == NULL);
350  ASSERT_HOST (!outseg_it.empty ());
351  outseg_it.move_to_last ();
352  mean_sum = outseg_it.data ()->sum ();
353  mean_sum = mean_sum * mean_sum / best_region_index;
354  if (outseg_it.data ()->squares () - mean_sum < 0)
355  tprintf ("Impossible sqsum=%g, mean=%g, total=%d\n",
356  outseg_it.data ()->squares (), outseg_it.data ()->sum (),
357  best_region_index);
358  lattice.deep_clear (); //shift the lot
359  return outseg_it.data ()->squares () - mean_sum;
360 }
361 
362 
363 /**********************************************************************
364  * make_illegal_segment
365  *
366  * Make a fake set of chop points due to having no legal places.
367  **********************************************************************/
368 
369 void make_illegal_segment( //find segmentation
370  FPSEGPT_LIST *prev_list, //previous segments
371  TBOX blob_box, //bounding box
372  BLOBNBOX_IT blob_it, //iterator
373  inT16 region_index, //number of segment
374  inT16 pitch, //pitch estimate
375  inT16 pitch_error, //tolerance
376  FPSEGPT_LIST *seg_list //output list
377  ) {
378  inT16 x; //current coord
379  inT16 min_x = 0; //in this region
380  inT16 max_x = 0;
381  inT16 offset; //dist to edge
382  FPSEGPT *segpt; //segment point
383  FPSEGPT *prevpt; //previous point
384  float best_cost; //best path
385  FPSEGPT_IT segpt_it = seg_list;//iterator
386  //previous points
387  FPSEGPT_IT prevpt_it = prev_list;
388 
389  best_cost = MAX_FLOAT32;
390  for (prevpt_it.mark_cycle_pt (); !prevpt_it.cycled_list ();
391  prevpt_it.forward ()) {
392  prevpt = prevpt_it.data ();
393  if (prevpt->cost_function () < best_cost) {
394  //find least
395  best_cost = prevpt->cost_function ();
396  min_x = prevpt->position ();
397  max_x = min_x; //limits on coords
398  }
399  else if (prevpt->cost_function () == best_cost) {
400  max_x = prevpt->position ();
401  }
402  }
403  min_x += pitch - pitch_error;
404  max_x += pitch + pitch_error;
405  for (x = min_x; x <= max_x; x++) {
406  while (x > blob_box.right ()) {
407  blob_box = box_next (&blob_it);
408  }
409  offset = x - blob_box.left ();
410  if (blob_box.right () - x < offset)
411  offset = blob_box.right () - x;
412  segpt = new FPSEGPT (x, FALSE, offset,
413  region_index, pitch, pitch_error, prev_list);
414  if (segpt->previous () != NULL) {
415  ASSERT_HOST (offset >= 0);
416  fprintf (stderr, "made fake at %d\n", x);
417  //make one up
418  segpt_it.add_after_then_move (segpt);
419  segpt->faked = TRUE;
420  segpt->fake_count++;
421  }
422  else
423  delete segpt;
424  }
425 }