Skip to content

Commit c702b48

Browse files
committed
Fix alto xml duplicates IDs when multiple pages are present
1 parent 3157ff0 commit c702b48

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

src/api/altorenderer.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) {
168168
case PT_PULLOUT_IMAGE: {
169169
// Handle all kinds of images.
170170
// TODO: optionally add TYPE, for example TYPE="photo".
171-
alto_str << "\t\t\t\t<Illustration ID=\"cblock_" << bcnt++ << "\"";
171+
alto_str << "\t\t\t\t<Illustration ID=\"cblock_" << page_number << "_" << bcnt++ << "\"";
172172
AddBoxToAlto(res_it.get(), RIL_BLOCK, alto_str);
173173
alto_str << "</Illustration>\n";
174174
res_it->Next(RIL_BLOCK);
@@ -177,7 +177,7 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) {
177177
case PT_HORZ_LINE:
178178
case PT_VERT_LINE:
179179
// Handle horizontal and vertical lines.
180-
alto_str << "\t\t\t\t<GraphicalElement ID=\"cblock_" << bcnt++ << "\"";
180+
alto_str << "\t\t\t\t<GraphicalElement ID=\"cblock_" << page_number << "_" << bcnt++ << "\"";
181181
AddBoxToAlto(res_it.get(), RIL_BLOCK, alto_str);
182182
alto_str << "</GraphicalElement >\n";
183183
res_it->Next(RIL_BLOCK);
@@ -190,24 +190,24 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) {
190190
}
191191

192192
if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
193-
alto_str << "\t\t\t\t<ComposedBlock ID=\"cblock_" << bcnt << "\"";
193+
alto_str << "\t\t\t\t<ComposedBlock ID=\"cblock_" << page_number << "_" << bcnt << "\"";
194194
AddBoxToAlto(res_it.get(), RIL_BLOCK, alto_str);
195195
alto_str << "\n";
196196
}
197197

198198
if (res_it->IsAtBeginningOf(RIL_PARA)) {
199-
alto_str << "\t\t\t\t\t<TextBlock ID=\"block_" << tcnt << "\"";
199+
alto_str << "\t\t\t\t\t<TextBlock ID=\"block_" << page_number << "_" << tcnt << "\"";
200200
AddBoxToAlto(res_it.get(), RIL_PARA, alto_str);
201201
alto_str << "\n";
202202
}
203203

204204
if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
205-
alto_str << "\t\t\t\t\t\t<TextLine ID=\"line_" << lcnt << "\"";
205+
alto_str << "\t\t\t\t\t\t<TextLine ID=\"line_" << page_number << "_" << lcnt << "\"";
206206
AddBoxToAlto(res_it.get(), RIL_TEXTLINE, alto_str);
207207
alto_str << "\n";
208208
}
209209

210-
alto_str << "\t\t\t\t\t\t\t<String ID=\"string_" << wcnt << "\"";
210+
alto_str << "\t\t\t\t\t\t\t<String ID=\"string_" << page_number << "_" << wcnt << "\"";
211211
AddBoxToAlto(res_it.get(), RIL_WORD, alto_str);
212212
alto_str << " CONTENT=\"";
213213

0 commit comments

Comments
 (0)