fonzie
clone your own copy | download snapshot

Snapshots | iceberg

No images in this repository’s iceberg at this time

Inside this repository

ocr.cpp
text/x-c++src

Download raw (13.0 KB)

/***************************************************************************
 *   Copyright (C) 2011 by Pierre Marchand   *
 *   pierre@oep-h.com   *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
 *************************************************************************/

#include "ocr.h"
#include "composer.h"
#include <QDebug>
#include <QRect>
#include <QPoint>
#include <QList>
#include <QSize>
#include <QColor>
#include <QDir>
#include <QFileInfo>
#include <QByteArray>
#include <QPainter>
#include <QPen>

#include <cmath>
#include <locale.h>

#include "tesseract/baseapi.h"
#include "tesseract/resultiterator.h"

#include "tesseract/pageres.h"
#include "tesseract/werd.h"


QImage dbgImage0;
int dbgW;
int dbgH;
double dbgScale;

OCR::OCR(const QString& datadir)
	:tesseract::TessBaseAPI()
{
	scaleGlyphs = true;
	//	SetVariable("stopper_debug_level", "10");
	//	SetVariable("classify_learning_debug_level", "10");
	//	SetVariable("dawg_debug_level", "10");
	//	SetVariable("global_ambigs_debug_level", "");
	//	SetVariable("global_tessdata_manager_debug_level", "10");
	//	SetVariable("hyphen_debug_level", "10");
	//	SetVariable("stopper_debug_level", "10");
	//	SetVariable("tosp_debug_level", "10");

	setlocale(LC_NUMERIC, "C");
	QFileInfo fi(datadir);
	if(fi.exists())
	{
		QString dir(fi.dir().absolutePath());
		QString lang(fi.fileName().left(3));
		qDebug()<<"Init Tesseract:"<<dir<<lang;

		QByteArray bad(dir.toUtf8());
		QByteArray bal(lang.toUtf8());

		int error = Init(bad.constData(), bal.constData());
		CS << "=>" <<error;
	}
	else
	{
		int error = Init(NULL, NULL);
		CS <<"Init Tesseract without datadir specified, it might work if TESSDATA_PREFIX is set and you want to recognize english. Returned"<<error;
	}
}


void OCR::loadImage(const QImage& img)
{
#if QT_VERSION < 0x040700
	SetImage(img.bits(), img.width(), img.height(), img.depth() /8, img.bytesPerLine());
#else
	SetImage(img.constBits(), img.width(), img.height(), img.depth() /8, img.bytesPerLine());
#endif
	m_img = img;
	dbgW = qMin(800, m_img.width());
	dbgH = m_img.height() * dbgW / m_img.width();
	dbgScale = double(dbgW) / double(m_img.width());
	dbgImage0 = m_img.scaled(dbgW, dbgH).convertToFormat(QImage::Format_ARGB32);
}


void OCR::saveImages(int baseline)
{
	int bl(baseline == 0 ? m_img.height() : baseline );
	char * resultText = GetUTF8Text();
	QString exportString(QString::fromUtf8(resultText));
	CS << exportString;
//	qDebug()<<"OCR:";
//	qDebug()<<exportString;
//	Recognize(0);
	char * text;
	int * lengths;
	float * costs;
	int *x0, *y0, *x1, *y1;
	const int result = TesseractExtractResult(&text, &lengths, &costs, &x0, &x1, &y0, &y1, page_res_);

	// This is not enough results, lets try to get xheight and baseline from the deepest of Tesseract :)
	PAGE_RES_IT page_res_it(page_res_);
	int word_count = 0;
	int letterCount(0);
	QList<GInfo> ginfo;
	ROW * block(0);
	unsigned int line(0);
	while (page_res_it.word() != NULL)
	{
		WERD_RES *word = page_res_it.word();
		ROW_RES *row = page_res_it.row();

		if (word_count)
		{
			GInfo gfn;
			gfn.line = line;
			ginfo << gfn;
			++letterCount;
		}

		if(row->row != block)
		{
			++line;
			block = row->row;
		}
		int wlen = strlen(word->best_choice->unichar_lengths().string());
		C_BLOB_LIST *blobs = word->word->cblob_list();
		C_BLOB_IT it(blobs);
		it.move_to_first();
//		for(int i(0); i < wordString.count(); ++i)
//		for(it.mark_cycle_pt();!it.cycled_list(); it.forward())
		for(int i = 0; i < wlen; i++)
		{
			C_BLOB *blob = it.data();
			TBOX current = blob->bounding_box();
//			TBOX current = word->bln_boxes->BlobBox(i);
			int cleft(current.left());
			int cright(current.width());
			GInfo gfn;
			gfn.xheight = row->row->x_height();
			gfn.ascender = row->row->ascenders();
			gfn.descender = row->row->descenders();
			gfn.baseline = m_img.height() - row->row->base_line(cleft + ( cright / 2));
			gfn.line = line;
			ginfo << gfn;
			//			gfn.dump();
			it.forward();
			++letterCount;
		}
		page_res_it.forward();
		++word_count;
	}
	///
//	PAGE_RES_IT page_res_it(page_res);
//	int word_count = 0;
//	while (page_res_it.word() != NULL)
//	{
//		WERD_RES *word = page_res_it.word();
//		const char *str = word->best_choice->unichar_string().string();
//		const char *len = word->best_choice->unichar_lengths().string();
//		TBOX real_rect = word->word->bounding_box();

//		if (word_count)
//			add_space(out);
//		int n = strlen(len);
//		for (int i = 0; i < n; i++)
//		{
//			TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()),
//						      str, *len);
//			tc->box = real_rect.intersection(word->box_word->BlobBox(i));
//			out->add_after_then_move(tc);
//			str += *len;
//			len++;
//		}
//		page_res_it.forward();
//		word_count++;
//	}
	////

	qDebug()<<"RESULT:"<<result;
	int iHeight(m_img.height());
	int tLen(0);
	for(int i(0); i<result;++i)
		tLen += lengths[i];
	QMap<QChar, double> costDict;
	QMap<int, int> baselineDict;
	QMap<int, int> descDict;
	QString tc(QString::fromUtf8(text));
        int tcCount = tc.count();
       // CS<<tc.count()<<letterCount;

        Q_ASSERT(result == letterCount);

	bool firstDbg(false);
	for(int i(0); i < result; ++i)
	{
		QChar c(tc.at(i));

		// Pass the whole thing to the composer
		// space characters are not "boxed", we invent something here based on the previous char
//		CS << "\n"<< i << c << x0[i] << x1[i] << y0[i] << y1[i] << "\n";
		if(c.isSpace())
		{
			if(i > 0)
				Composer::Add(c, QPointF(y0[i - 1], iHeight - x1[i - 1]), ginfo.at(i - 1).xheight, ginfo.at(i).line);
		}
		else
			Composer::Add(c, QPointF(x0[i], iHeight - x1[i]), ginfo.at(i).xheight, ginfo.at(i).line);
		CS<<c<<ginfo.at(i).line;

		if(!iDict.contains(c))
		{
			if(!c.isPrint() || c.isSpace())
				continue;
			QPoint tl(x0[i], iHeight - y1[i]);
			QPoint br(y0[i],  /*descDict.value(i)*/ iHeight - x1[i]);
			QRect r/*(tl, br)*/;
			// Search the larger (but not too much larger) rect enclosing the same letter
			int iCount(1);

			// First loop to avoid extreme sizes
			double mws(0);
			double mhs(0);
			double asmples(0);
			for(int j(i), nSamples(0); j < result && nSamples < 255; ++j)
			{
				QChar c2(tc.at(j));
				if(c == c2)
				{
					++nSamples;
					QPoint tl2(x0[j], iHeight - y1[j]);
					QPoint br2(y0[j],  iHeight - x1[j]);
					QRect r2(tl2, br2);

					mws += double(r2.width());
					mhs += double(r2.height());

					asmples += 1.0;
				}
			}

			mws /=  asmples;
			mhs /= asmples;
			double mhs_max = mhs * 1.1;
			double mhs_min = mhs * 0.9;

//#define DBG_EXPORT_BOXES
#ifdef DBG_EXPORT_BOXES

			QImage dbgImage(dbgImage0.copy());
			QPen pen(Qt::blue);
			QPen bpen(Qt::red);
			QPainter painter(&dbgImage);
			painter.scale(dbgScale, dbgScale);
			painter.setBrush(Qt::NoBrush);
			painter.setPen(pen);
#endif
			for(int j(i+1); j < result; ++j)
			{
				if(iCount >= 255)
					break;
				QChar c2(tc.at(j));
				if(c == c2)
				{
					QPoint tl2(x0[j], iHeight - y1[j]);
					QPoint br2(y0[j],  iHeight - x1[j]);
					QRect r2(tl2, br2);
#ifdef DBG_EXPORT_BOXES
					painter.drawRect(r2);
					painter.setPen(bpen);
					painter.drawLine(QPoint(r2.left(), ginfo.at(j).baseline),
							 QPoint(r2.right(), ginfo.at(j).baseline));
					painter.setPen(pen);
#endif
					if(double(r2.height()) < mhs_max && double(r2.height()) > mhs_min)
					{
						if(r2.width() > r.width())
						{
							r.setWidth(qMax(r.width(), r2.width()));
//							r.setLeft(r.left() - ((r2.width()- r.width())/2));
						}
						if(r2.height() > r.height())
						{
							r.setHeight(qMax(r.height(), r2.height()));
//							r.setTop(r.top() - ((r2.top() - r2.top())/2));
						}
					}
					++iCount;
				}
			}
#ifdef DBG_EXPORT_BOXES
			painter.end();
			dbgImage.save(QString("%1_BOXES.png").arg(c));
#endif
			qDebug()<<"Processing character:"<<c<<"; samples:"<<iCount;
			QImage ref(r.size(),QImage::Format_ARGB32);
			ref.fill(0xFFFFFFFF);
			int rwidth(r.width());
			int rheight(r.height());
			QList<double> xheights;
			QList<int> baselines;



				for(int j(i), nSamples(0); j < result && nSamples < 255; ++j)
				{
					QChar c2(tc.at(j));
					if(c == c2)
					{
						++nSamples;
						QPoint tl2(x0[j], iHeight - y1[j]);
						QPoint br2(y0[j],  iHeight - x1[j]);
						QRect r2(tl2, br2);
						if(double(r2.height()) < mhs_max && double(r2.height()) > mhs_min)
						{
							double vScale(double(r2.height()) / double(r.height()));

							if(c == QChar('a'))
								qDebug()<<"max:"<<mhs_max<<"min:"<<mhs_min<<"height:"<<double(r2.height())<<"rheight:"<<double(r.height());


							baselines.append(qRound(double(ginfo.at(j).baseline - r2.bottom()) * vScale));
							xheights.append(double(ginfo.at(j).xheight)* vScale);
							QImage cImg(m_img.copy(r2).scaled(r.size(), Qt::IgnoreAspectRatio, Qt::SmoothTransformation));
							for(int y(0); y < r.height() ; ++y)
							{
								for(int x(0); x < r.width(); ++x)
								{
									int refX(x);
									int refY(y);
									int valR2(qGray(cImg.pixel(x,y)) < 128 ? 255 / iCount : 0);
									// int valR2((255 - qGray(cImg.pixel(x,y))) / iCount);
									int valRef(qGray(ref.pixel(refX, refY)));
									int newVal(qMax(0, valRef - valR2));
									int newColor(QColor(newVal, newVal, newVal, 255).rgba());
									ref.setPixel(refX, refY, newColor);
								}
							}
						}

					}
				}
//				ref.save(QString("ref_%1.png").arg(c.unicode()));
			//			else
			//			{
			//				for(int j(i), nSamples(0); j < result && nSamples < 256; ++j)
			//				{
			//					QChar c2(tc.at(j));
			//					if(c == c2)
			//					{
			//						++nSamples;
			//						QPoint tl2(x0[j], iHeight - y1[j]);
			//						QPoint br2(y0[j],  iHeight - x1[j]);
			//						QRect r2(tl2, br2);
			//						// center extracted image on ref;
			//						int remainX((rwidth - r2.width()) / 2);
			//						int remainY((rheight - r2.height()) /*/ 2*/);
			//						//					r2.translate(remainX / 2, remainY / 2);
			//						for(int y(r2.y()); y < r2.y() + r2.height(); ++y)
			//						{
			//							for(int x(r2.x()); x < r2.x() + r2.width(); ++x)
			//							{
			//								int refX(x - r2.left() + remainX);
			//								int refY(y - r2.top() + remainY);
			//								int valR2((255 - qGray(m_img.pixel(x,y))) / iCount);
			//								int valRef(qGray(ref.pixel(refX, refY)));
			//								int newVal(qMax(0, valRef - valR2));
			//								int newColor(QColor(newVal, newVal, newVal, 255).rgba());
			//								if(firstDbg)
			//									qDebug()<<refX<<refY<<newColor;
			//								ref.setPixel(refX, refY, newColor);
			//							}
			//						}
			//						firstDbg = false;

			//					}
			//				}
			//			}

			double adjustedBaseline(0);
			foreach(int b, baselines)
				adjustedBaseline += b;
			adjustedBaseline /= double(baselines.count());

			double adjustedXheight(0);
			foreach(double d, xheights)
				adjustedXheight += d;
			adjustedXheight /= double(xheights.count());

			xheightDict.insert(c,adjustedXheight);
			iDict.insert(c, ref);
			offsetDict.insert(c, qRound(adjustedBaseline));
		}
		//		lists[QChar(text[i])].append(m_img.copy(r));
	}
	//	foreach(const QChar& c, lists.keys())
	//	{
	//		QString fname(QString("%1.png").arg(c.unicode()));
	////		bool is = lists.value(c).first().save(fname.toLocal8Bit());
	////		qDebug()<<"Save:"<<fname ;

	//		QSize maxSize;
	//		foreach(const QImage& im, lists.value(c))
	//		{
	//			maxSize = QSize(qMax(im.size().width(), maxSize.width()),
	//				qMax(im.size().height(), maxSize.height()));
	//		}

	//		QImage t(maxSize, QImage::Format_ARGB32);
	//		t.fill(0xFFFFFFFF);
	//		int icount(lists.value(c).count());
	//		foreach(const QImage& im, lists.value(c))
	//		{
	//			QImage ref = im.scaled(maxSize);
	//			for(int y(0); y < maxSize.height(); ++y)
	//			{
	//				for(int x(0); x <maxSize.width(); ++x)
	//				{
	//					int valRef((255 - qGray(ref.pixel(x,y))) / icount);
	//					int valT(qGray(t.pixel(x,y)));
	//					int newVal(valT - valRef);
	//					int newColor(QColor(newVal, newVal, newVal, 255).rgba());
	////					qDebug()<<valRef<<valT<<newVal<<newColor;
	//					t.setPixel(x,y, newColor);
	//				}
	//			}

	//		}
	//		iDict.insert(c, t);
	//		t.save(QString("%1.png").arg(c.unicode()));

	//	}



}