/*************************************************************************** * Copyright (C) 2011 by Pierre Marchand * * pierre@oep-h.com * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * *************************************************************************/ #include "ocr.h" #include "composer.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include "tesseract/baseapi.h" #include "tesseract/resultiterator.h" #include "tesseract/pageres.h" #include "tesseract/werd.h" QImage dbgImage0; int dbgW; int dbgH; double dbgScale; OCR::OCR(const QString& datadir) :tesseract::TessBaseAPI() { scaleGlyphs = true; // SetVariable("stopper_debug_level", "10"); // SetVariable("classify_learning_debug_level", "10"); // SetVariable("dawg_debug_level", "10"); // SetVariable("global_ambigs_debug_level", ""); // SetVariable("global_tessdata_manager_debug_level", "10"); // SetVariable("hyphen_debug_level", "10"); // SetVariable("stopper_debug_level", "10"); // SetVariable("tosp_debug_level", "10"); setlocale(LC_NUMERIC, "C"); QFileInfo fi(datadir); if(fi.exists()) { QString dir(fi.dir().absolutePath()); QString lang(fi.fileName().left(3)); qDebug()<<"Init Tesseract:"<" < ginfo; ROW * block(0); unsigned int line(0); while (page_res_it.word() != NULL) { WERD_RES *word = page_res_it.word(); ROW_RES *row = page_res_it.row(); if (word_count) { GInfo gfn; gfn.line = line; ginfo << gfn; ++letterCount; } if(row->row != block) { ++line; block = row->row; } int wlen = strlen(word->best_choice->unichar_lengths().string()); C_BLOB_LIST *blobs = word->word->cblob_list(); C_BLOB_IT it(blobs); it.move_to_first(); // for(int i(0); i < wordString.count(); ++i) // for(it.mark_cycle_pt();!it.cycled_list(); it.forward()) for(int i = 0; i < wlen; i++) { C_BLOB *blob = it.data(); TBOX current = blob->bounding_box(); // TBOX current = word->bln_boxes->BlobBox(i); int cleft(current.left()); int cright(current.width()); GInfo gfn; gfn.xheight = row->row->x_height(); gfn.ascender = row->row->ascenders(); gfn.descender = row->row->descenders(); gfn.baseline = m_img.height() - row->row->base_line(cleft + ( cright / 2)); gfn.line = line; ginfo << gfn; // gfn.dump(); it.forward(); ++letterCount; } page_res_it.forward(); ++word_count; } /// // PAGE_RES_IT page_res_it(page_res); // int word_count = 0; // while (page_res_it.word() != NULL) // { // WERD_RES *word = page_res_it.word(); // const char *str = word->best_choice->unichar_string().string(); // const char *len = word->best_choice->unichar_lengths().string(); // TBOX real_rect = word->word->bounding_box(); // if (word_count) // add_space(out); // int n = strlen(len); // for (int i = 0; i < n; i++) // { // TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()), // str, *len); // tc->box = real_rect.intersection(word->box_word->BlobBox(i)); // out->add_after_then_move(tc); // str += *len; // len++; // } // page_res_it.forward(); // word_count++; // } //// qDebug()<<"RESULT:"< costDict; QMap baselineDict; QMap descDict; QString tc(QString::fromUtf8(text)); int tcCount = tc.count(); // CS< 0) Composer::Add(c, QPointF(y0[i - 1], iHeight - x1[i - 1]), ginfo.at(i - 1).xheight, ginfo.at(i).line); } else Composer::Add(c, QPointF(x0[i], iHeight - x1[i]), ginfo.at(i).xheight, ginfo.at(i).line); CS<= 255) break; QChar c2(tc.at(j)); if(c == c2) { QPoint tl2(x0[j], iHeight - y1[j]); QPoint br2(y0[j], iHeight - x1[j]); QRect r2(tl2, br2); #ifdef DBG_EXPORT_BOXES painter.drawRect(r2); painter.setPen(bpen); painter.drawLine(QPoint(r2.left(), ginfo.at(j).baseline), QPoint(r2.right(), ginfo.at(j).baseline)); painter.setPen(pen); #endif if(double(r2.height()) < mhs_max && double(r2.height()) > mhs_min) { if(r2.width() > r.width()) { r.setWidth(qMax(r.width(), r2.width())); // r.setLeft(r.left() - ((r2.width()- r.width())/2)); } if(r2.height() > r.height()) { r.setHeight(qMax(r.height(), r2.height())); // r.setTop(r.top() - ((r2.top() - r2.top())/2)); } } ++iCount; } } #ifdef DBG_EXPORT_BOXES painter.end(); dbgImage.save(QString("%1_BOXES.png").arg(c)); #endif qDebug()<<"Processing character:"< xheights; QList baselines; for(int j(i), nSamples(0); j < result && nSamples < 255; ++j) { QChar c2(tc.at(j)); if(c == c2) { ++nSamples; QPoint tl2(x0[j], iHeight - y1[j]); QPoint br2(y0[j], iHeight - x1[j]); QRect r2(tl2, br2); if(double(r2.height()) < mhs_max && double(r2.height()) > mhs_min) { double vScale(double(r2.height()) / double(r.height())); if(c == QChar('a')) qDebug()<<"max:"<