20 #ifndef TESSERACT_CCUTIL_TESSDATAMANAGER_H_
21 #define TESSERACT_CCUTIL_TESSDATAMANAGER_H_
27 static const char kTrainedDataSuffix[] =
"traineddata";
31 static const char kLangConfigFileSuffix[] =
"config";
32 static const char kUnicharsetFileSuffix[] =
"unicharset";
33 static const char kAmbigsFileSuffix[] =
"unicharambigs";
34 static const char kBuiltInTemplatesFileSuffix[] =
"inttemp";
35 static const char kBuiltInCutoffsFileSuffix[] =
"pffmtable";
36 static const char kNormProtoFileSuffix[] =
"normproto";
37 static const char kPuncDawgFileSuffix[] =
"punc-dawg";
38 static const char kSystemDawgFileSuffix[] =
"word-dawg";
39 static const char kNumberDawgFileSuffix[] =
"number-dawg";
40 static const char kFreqDawgFileSuffix[] =
"freq-dawg";
41 static const char kFixedLengthDawgsFileSuffix[] =
"fixed-length-dawgs";
42 static const char kCubeUnicharsetFileSuffix[] =
"cube-unicharset";
43 static const char kCubeSystemDawgFileSuffix[] =
"cube-word-dawg";
44 static const char kShapeTableFileSuffix[] =
"shapetable";
45 static const char kBigramDawgFileSuffix[] =
"bigram-dawg";
46 static const char kUnambigDawgFileSuffix[] =
"unambig-dawg";
47 static const char kParamsTrainingModelFileSuffix[] =
"params-training-model";
77 static const char *
const kTessdataFileSuffixes[] = {
78 kLangConfigFileSuffix,
79 kUnicharsetFileSuffix,
81 kBuiltInTemplatesFileSuffix,
82 kBuiltInCutoffsFileSuffix,
85 kSystemDawgFileSuffix,
86 kNumberDawgFileSuffix,
88 kFixedLengthDawgsFileSuffix,
89 kCubeUnicharsetFileSuffix,
90 kCubeSystemDawgFileSuffix,
91 kShapeTableFileSuffix,
92 kBigramDawgFileSuffix,
93 kUnambigDawgFileSuffix,
94 kParamsTrainingModelFileSuffix,
101 static const bool kTessdataFileIsText[] = {
128 static const int kMaxNumTessdataEntries = 1000;
135 actual_tessdata_num_entries_ = 0;
137 offset_table_[i] = -1;
147 bool Init(
const char *data_file_name,
int debug_level);
159 tprintf(
"TessdataManager: seek to offset %lld - start of tessdata"
160 "type %d (%s))\n", offset_table_[tessdata_type],
161 tessdata_type, kTessdataFileSuffixes[tessdata_type]);
163 if (offset_table_[tessdata_type] < 0) {
167 static_cast<size_t>(offset_table_[tessdata_type]),
174 int index = tessdata_type + 1;
175 while (index < actual_tessdata_num_entries_ && offset_table_[index] == -1) {
179 tprintf(
"TessdataManager: end offset for type %d is %lld\n",
181 (index == actual_tessdata_num_entries_) ? -1
182 : offset_table_[index]);
184 return (index == actual_tessdata_num_entries_) ? -1 : offset_table_[index] - 1;
188 if (data_file_ !=
NULL) {
206 const char *output_filename);
214 char **component_filenames,
215 int num_new_components);
234 static void CopyFile(FILE *input_file, FILE *output_file,
235 bool newline_end,
inT64 num_bytes_to_copy);
263 static FILE *GetFilePtr(
const char *language_data_path_prefix,
264 const char *file_suffix,
bool text_file);
279 inT32 actual_tessdata_num_entries_;
289 #endif // TESSERACT_CCUTIL_TESSDATAMANAGER_H_