649 lines
27 KiB
649 lines
27 KiB
* Copyright @ 2012 Quan Nguyen
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
package net.sourceforge.tess4j;
import static org.junit.Assert.assertArrayEquals;
import java.awt.image.BufferedImage;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.nio.ByteBuffer;
import java.nio.FloatBuffer;
import java.nio.IntBuffer;
import java.util.Arrays;
import javax.imageio.ImageIO;
import net.sourceforge.tess4j.util.ImageIOHelper;
import net.sourceforge.tess4j.util.LoggHelper;
import net.sourceforge.tess4j.util.Utils;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import com.ochafik.lang.jnaerator.runtime.NativeSize;
import com.sun.jna.NativeLong;
import com.sun.jna.Pointer;
import com.sun.jna.StringArray;
import com.sun.jna.ptr.PointerByReference;
import net.sourceforge.lept4j.Box;
import net.sourceforge.lept4j.Boxa;
import static net.sourceforge.lept4j.ILeptonica.L_CLONE;
import net.sourceforge.lept4j.Leptonica;
import net.sourceforge.lept4j.Pix;
import net.sourceforge.lept4j.util.LeptUtils;
import net.sourceforge.tess4j.ITessAPI.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static net.sourceforge.tess4j.ITessAPI.FALSE;
import static net.sourceforge.tess4j.ITessAPI.TRUE;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
public class TessAPITest {
private static final Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
private final String datapath = ".";
private final String testResourcesDataPath = "test/resources/test-data";
String language = "eng";
String expOCRResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 <lazy> #90 dog";
TessAPI api;
TessBaseAPI handle;
public static void setUpClass() throws Exception {
public static void tearDownClass() throws Exception {
public void setUp() {
api = new TessAPIImpl().getInstance();
handle = api.TessBaseAPICreate();
public void tearDown() {
* Test of TessBaseAPIRect method, of class TessAPI.
* @throws java.lang.Exception
public void testTessBaseAPIRect() throws Exception {
String expResult = expOCRResult;
File tiff = new File(testResourcesDataPath, "eurotext.tif");
BufferedImage image = ImageIO.read(tiff); // require jai-imageio lib to read TIFF
ByteBuffer buf = ImageIOHelper.convertImageData(image);
int bpp = image.getColorModel().getPixelSize();
int bytespp = bpp / 8;
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
api.TessBaseAPIInit3(handle, datapath, language);
api.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO);
Pointer utf8Text = api.TessBaseAPIRect(handle, buf, bytespp, bytespl, 90, 50, 862, 614);
String result = utf8Text.getString(0);
* Test of TessBaseAPIGetUTF8Text method, of class TessAPI.
* @throws java.lang.Exception
public void testTessBaseAPIGetUTF8Text() throws Exception {
String expResult = expOCRResult;
File tiff = new File(testResourcesDataPath, "eurotext.tif");
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
ByteBuffer buf = ImageIOHelper.convertImageData(image);
int bpp = image.getColorModel().getPixelSize();
int bytespp = bpp / 8;
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
api.TessBaseAPIInit3(handle, datapath, language);
api.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO);
api.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
api.TessBaseAPISetRectangle(handle, 90, 50, 862, 614);
Pointer utf8Text = api.TessBaseAPIGetUTF8Text(handle);
String result = utf8Text.getString(0);
* Test of TessBaseAPIGetUTF8Text method, of class TessAPI.
* @throws java.lang.Exception
public void testTessBaseAPIGetUTF8Text_Pix() throws Exception {
String expResult = expOCRResult;
File tiff = new File(testResourcesDataPath, "eurotext.tif");
Leptonica leptInstance = Leptonica.INSTANCE;
Pix pix = leptInstance.pixRead(tiff.getPath());
api.TessBaseAPIInit3(handle, datapath, language);
api.TessBaseAPISetImage2(handle, pix);
Pointer utf8Text = api.TessBaseAPIGetUTF8Text(handle);
String result = utf8Text.getString(0);
//release Pix resource
PointerByReference pRef = new PointerByReference();
* Test of TessBaseAPIGetComponentImages method, of class TessAPI.
* @throws java.lang.Exception
public void testTessBaseAPIGetComponentImages() throws Exception {
File image = new File(testResourcesDataPath, "eurotext.png");
int expResult = 12; // number of lines in the test image
Leptonica leptInstance = Leptonica.INSTANCE;
Pix pix = leptInstance.pixRead(image.getPath());
api.TessBaseAPIInit3(handle, datapath, language);
api.TessBaseAPISetImage2(handle, pix);
PointerByReference pixa = null;
PointerByReference blockids = null;
Boxa boxes = api.TessBaseAPIGetComponentImages(handle, TessPageIteratorLevel.RIL_TEXTLINE, TRUE, pixa, blockids);
// boxes = api.TessBaseAPIGetRegions(handle, pixa); // equivalent to TessPageIteratorLevel.RIL_BLOCK
int boxCount = leptInstance.boxaGetCount(boxes);
for (int i = 0; i < boxCount; i++) {
Box box = leptInstance.boxaGetBox(boxes, i, L_CLONE);
if (box == null) {
api.TessBaseAPISetRectangle(handle, box.x, box.y, box.w, box.h);
Pointer utf8Text = api.TessBaseAPIGetUTF8Text(handle);
String ocrResult = utf8Text.getString(0);
int conf = api.TessBaseAPIMeanTextConf(handle);
System.out.print(String.format("Box[%d]: x=%d, y=%d, w=%d, h=%d, confidence: %d, text: %s", i, box.x, box.y, box.w, box.h, conf, ocrResult));
// release Pix and Boxa resources
assertEquals(expResult, boxCount);
* Test of TessVersion method, of class TessAPI.
public void testTessVersion() {
String expResult = "3.05.01";
String result = api.TessVersion();
* Test of TessBaseAPIGetBoolVariable method, of class TessAPI.
public void testTessBaseAPIGetBoolVariable() {
String name = "tessedit_create_hocr";
api.TessBaseAPISetVariable(handle, name, "1");
IntBuffer value = IntBuffer.allocate(1);
int result = -1;
if (api.TessBaseAPIGetBoolVariable(handle, "tessedit_create_hocr", value) == TRUE) {
result = value.get(0);
int expResult = 1;
assertEquals(expResult, result);
* Test of TessBaseAPIPrintVariables method, of class TessAPI.
* @throws java.lang.Exception
public void testTessBaseAPIPrintVariablesToFile() throws Exception {
String var = "tessedit_char_whitelist";
String value = "0123456789";
api.TessBaseAPISetVariable(handle, var, value);
String filename = "printvar.txt";
api.TessBaseAPIPrintVariablesToFile(handle, filename); // will crash if not invoked after some method
File file = new File(filename);
BufferedReader input = new BufferedReader(new FileReader(file));
StringBuilder strB = new StringBuilder();
String line;
String EOL = System.getProperty("line.separator");
while ((line = input.readLine()) != null) {
assertTrue(strB.toString().contains(var + "\t" + value));
* Test of TessBaseAPIInit4 method, of class TessAPI.
public void testTessBaseAPIInit4() {
int oem = TessOcrEngineMode.OEM_DEFAULT;
PointerByReference configs = null; //new PointerByReference();
int configs_size = 0;
// disable loading dictionaries
String[] args = new String[]{"load_system_dawg", "load_freq_dawg"};
StringArray sarray = new StringArray(args);
PointerByReference vars_vec = new PointerByReference();
args = new String[]{"F", "F"};
sarray = new StringArray(args);
PointerByReference vars_values = new PointerByReference();
NativeSize vars_vec_size = new NativeSize(args.length);
int expResult = 0;
int result = api.TessBaseAPIInit4(handle, datapath, language, oem, configs, configs_size, vars_vec, vars_values, vars_vec_size, FALSE);
assertEquals(expResult, result);
* Test of TessBaseAPIGetInitLanguagesAsString method, of class TessAPI.
public void testTessBaseAPIGetInitLanguagesAsString() {
String expResult = "";
String result = api.TessBaseAPIGetInitLanguagesAsString(handle);
assertEquals(expResult, result);
* Test of TessBaseAPIGetLoadedLanguagesAsVector method, of class TessAPI.
public void testTessBaseAPIGetLoadedLanguagesAsVector() {
api.TessBaseAPIInit3(handle, datapath, language);
String[] expResult = {"eng"};
String[] result = api.TessBaseAPIGetLoadedLanguagesAsVector(handle).getPointer().getStringArray(0);
assertArrayEquals(expResult, result);
* Test of TessBaseAPIGetAvailableLanguagesAsVector method, of class
* TessAPI.
public void testTessBaseAPIGetAvailableLanguagesAsVector() {
api.TessBaseAPIInit3(handle, datapath, language);
String[] expResult = {"eng"};
String[] result = api.TessBaseAPIGetAvailableLanguagesAsVector(handle).getPointer().getStringArray(0);
* Test of TessBaseAPIGetHOCRText method, of class TessAPI.
* @throws java.lang.Exception
public void testTessBaseAPIGetHOCRText() throws Exception {
File tiff = new File(testResourcesDataPath, "eurotext.tif");
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
ByteBuffer buf = ImageIOHelper.convertImageData(image);
int bpp = image.getColorModel().getPixelSize();
int bytespp = bpp / 8;
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
api.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO);
api.TessBaseAPIInit3(handle, datapath, language);
api.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
int page_number = 0;
Pointer utf8Text = api.TessBaseAPIGetHOCRText(handle, page_number);
String result = utf8Text.getString(0);
assertTrue(result.contains("<div class='ocr_page'"));
* Test of TessBaseAPIAnalyseLayout method, of class TessAPI.
* @throws java.lang.Exception
public void testTessBaseAPIAnalyseLayout() throws Exception {
File image = new File(testResourcesDataPath, "eurotext.png");
int expResult = 12; // number of lines in the test image
Leptonica leptInstance = Leptonica.INSTANCE;
Pix pix = leptInstance.pixRead(image.getPath());
api.TessBaseAPIInit3(handle, datapath, language);
api.TessBaseAPISetImage2(handle, pix);
int pageIteratorLevel = TessPageIteratorLevel.RIL_TEXTLINE;
logger.info("PageIteratorLevel: " + Utils.getConstantName(pageIteratorLevel, TessPageIteratorLevel.class));
int i = 0;
TessPageIterator pi = api.TessBaseAPIAnalyseLayout(handle);
do {
IntBuffer leftB = IntBuffer.allocate(1);
IntBuffer topB = IntBuffer.allocate(1);
IntBuffer rightB = IntBuffer.allocate(1);
IntBuffer bottomB = IntBuffer.allocate(1);
api.TessPageIteratorBoundingBox(pi, pageIteratorLevel, leftB, topB, rightB, bottomB);
int left = leftB.get();
int top = topB.get();
int right = rightB.get();
int bottom = bottomB.get();
logger.info(String.format("Box[%d]: x=%d, y=%d, w=%d, h=%d", i++, left, top, right - left, bottom - top));
} while (api.TessPageIteratorNext(pi, pageIteratorLevel) == TRUE);
assertEquals(expResult, i);
* Test of TessBaseAPIDetectOrientationScript method, of class TessAPI.
* @throws java.lang.Exception
public void testTessBaseAPIDetectOrientationScript() throws Exception {
File image = new File(testResourcesDataPath, "eurotext.png");
int expResult = TRUE;
Leptonica leptInstance = Leptonica.INSTANCE;
Pix pix = leptInstance.pixRead(image.getPath());
api.TessBaseAPIInit3(handle, datapath, language);
api.TessBaseAPISetImage2(handle, pix);
IntBuffer orient_degB = IntBuffer.allocate(1);
FloatBuffer orient_confB = FloatBuffer.allocate(1);
PointerByReference script_nameB = new PointerByReference();
FloatBuffer script_confB = FloatBuffer.allocate(1);
int result = api.TessBaseAPIDetectOrientationScript(handle, orient_degB, orient_confB, script_nameB, script_confB);
if (result == TRUE) {
int orient_deg = orient_degB.get();
float orient_conf = orient_confB.get();
String script_name = script_nameB.getValue().getString(0);
float script_conf = script_confB.get();
logger.info(String.format("OrientationScript: orient_deg=%d, orient_conf=%f, script_name=%s, script_conf=%f", orient_deg, orient_conf, script_name, script_conf));
PointerByReference pRef = new PointerByReference();
assertEquals(expResult, result);
* Test of Orientation and script detection (OSD).
* @throws java.lang.Exception
public void testOSD() throws Exception {
int expResult = TessPageSegMode.PSM_AUTO_OSD;
IntBuffer orientation = IntBuffer.allocate(1);
IntBuffer direction = IntBuffer.allocate(1);
IntBuffer order = IntBuffer.allocate(1);
FloatBuffer deskew_angle = FloatBuffer.allocate(1);
File tiff = new File(testResourcesDataPath, "eurotext.tif");
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
ByteBuffer buf = ImageIOHelper.convertImageData(image);
int bpp = image.getColorModel().getPixelSize();
int bytespp = bpp / 8;
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
api.TessBaseAPIInit3(handle, datapath, language);
api.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO_OSD);
int actualResult = api.TessBaseAPIGetPageSegMode(handle);
logger.info("PSM: " + Utils.getConstantName(actualResult, TessPageSegMode.class));
api.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
int success = api.TessBaseAPIRecognize(handle, null);
if (success == 0) {
TessPageIterator pi = api.TessBaseAPIAnalyseLayout(handle);
api.TessPageIteratorOrientation(pi, orientation, direction, order, deskew_angle);
"Orientation: %s\nWritingDirection: %s\nTextlineOrder: %s\nDeskew angle: %.4f\n",
Utils.getConstantName(orientation.get(), TessOrientation.class),
Utils.getConstantName(direction.get(), TessWritingDirection.class),
Utils.getConstantName(order.get(), TessTextlineOrder.class),
assertEquals(expResult, actualResult);
* Test of ResultIterator and PageIterator.
* @throws Exception
public void testResultIterator() throws Exception {
File tiff = new File(testResourcesDataPath, "eurotext.tif");
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
ByteBuffer buf = ImageIOHelper.convertImageData(image);
int bpp = image.getColorModel().getPixelSize();
int bytespp = bpp / 8;
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
api.TessBaseAPIInit3(handle, datapath, language);
api.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO);
api.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
ETEXT_DESC monitor = new ETEXT_DESC();
TimeVal timeout = new TimeVal();
timeout.tv_sec = new NativeLong(0L); // time > 0 causes blank ouput
monitor.end_time = timeout;
ProgressMonitor pmo = new ProgressMonitor(monitor);
api.TessBaseAPIRecognize(handle, monitor);
logger.info("Message: " + pmo.getMessage());
TessResultIterator ri = api.TessBaseAPIGetIterator(handle);
TessPageIterator pi = api.TessResultIteratorGetPageIterator(ri);
logger.info("Bounding boxes:\nchar(s) left top right bottom confidence font-attributes");
int level = TessPageIteratorLevel.RIL_WORD;
// int height = image.getHeight();
do {
Pointer ptr = api.TessResultIteratorGetUTF8Text(ri, level);
String word = ptr.getString(0);
float confidence = api.TessResultIteratorConfidence(ri, level);
IntBuffer leftB = IntBuffer.allocate(1);
IntBuffer topB = IntBuffer.allocate(1);
IntBuffer rightB = IntBuffer.allocate(1);
IntBuffer bottomB = IntBuffer.allocate(1);
api.TessPageIteratorBoundingBox(pi, level, leftB, topB, rightB, bottomB);
int left = leftB.get();
int top = topB.get();
int right = rightB.get();
int bottom = bottomB.get();
System.out.print(String.format("%s %d %d %d %d %f", word, left, top, right, bottom, confidence));
// logger.info(String.format("%s %d %d %d %d", str, left, height - bottom, right, height - top)); //
// training box coordinates
IntBuffer boldB = IntBuffer.allocate(1);
IntBuffer italicB = IntBuffer.allocate(1);
IntBuffer underlinedB = IntBuffer.allocate(1);
IntBuffer monospaceB = IntBuffer.allocate(1);
IntBuffer serifB = IntBuffer.allocate(1);
IntBuffer smallcapsB = IntBuffer.allocate(1);
IntBuffer pointSizeB = IntBuffer.allocate(1);
IntBuffer fontIdB = IntBuffer.allocate(1);
String fontName = api.TessResultIteratorWordFontAttributes(ri, boldB, italicB, underlinedB, monospaceB,
serifB, smallcapsB, pointSizeB, fontIdB);
boolean bold = boldB.get() == TRUE;
boolean italic = italicB.get() == TRUE;
boolean underlined = underlinedB.get() == TRUE;
boolean monospace = monospaceB.get() == TRUE;
boolean serif = serifB.get() == TRUE;
boolean smallcaps = smallcapsB.get() == TRUE;
int pointSize = pointSizeB.get();
int fontId = fontIdB.get();
logger.info(String.format(" font: %s, size: %d, font id: %d, bold: %b,"
+ " italic: %b, underlined: %b, monospace: %b, serif: %b, smallcap: %b", fontName, pointSize,
fontId, bold, italic, underlined, monospace, serif, smallcaps));
} while (api.TessPageIteratorNext(pi, level) == TRUE);
* Test of ChoiceIterator.
* @throws Exception
public void testChoiceIterator() throws Exception {
File tiff = new File(testResourcesDataPath, "eurotext.tif");
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
ByteBuffer buf = ImageIOHelper.convertImageData(image);
int bpp = image.getColorModel().getPixelSize();
int bytespp = bpp / 8;
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
api.TessBaseAPIInit3(handle, datapath, language);
api.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
api.TessBaseAPISetVariable(handle, "save_blob_choices", "T");
api.TessBaseAPISetRectangle(handle, 37, 228, 548, 31);
ETEXT_DESC monitor = new ETEXT_DESC();
ProgressMonitor pmo = new ProgressMonitor(monitor);
api.TessBaseAPIRecognize(handle, monitor);
logger.info("Message: " + pmo.getMessage());
TessResultIterator ri = api.TessBaseAPIGetIterator(handle);
int level = TessPageIteratorLevel.RIL_SYMBOL;
if (ri != null) {
do {
Pointer symbol = api.TessResultIteratorGetUTF8Text(ri, level);
float conf = api.TessResultIteratorConfidence(ri, level);
if (symbol != null) {
logger.info(String.format("symbol %s, conf: %f", symbol.getString(0), conf));
boolean indent = false;
TessChoiceIterator ci = api.TessResultIteratorGetChoiceIterator(ri);
do {
if (indent) {
System.out.print("\t- ");
String choice = api.TessChoiceIteratorGetUTF8Text(ci);
logger.info(String.format("%s conf: %f", choice, api.TessChoiceIteratorConfidence(ci)));
indent = true;
} while (api.TessChoiceIteratorNext(ci) == ITessAPI.TRUE);
} while (api.TessResultIteratorNext(ri, level) == ITessAPI.TRUE);
* Test of ResultRenderer method, of class TessAPI.
* @throws java.lang.Exception
public void testResultRenderer() throws Exception {
String image = String.format("%s/%s", testResourcesDataPath, "eurotext.tif");
String output = "capi-test.txt";
int set_only_init_params = FALSE;
int oem = TessOcrEngineMode.OEM_DEFAULT;
PointerByReference configs = null;
int configs_size = 0;
String[] params = {"load_system_dawg", "tessedit_char_whitelist"};
String vals[] = {"F", ""}; //0123456789-.IThisalotfpnex
PointerByReference vars_vec = new PointerByReference();
vars_vec.setPointer(new StringArray(params));
PointerByReference vars_values = new PointerByReference();
vars_values.setPointer(new StringArray(vals));
NativeSize vars_vec_size = new NativeSize(params.length);
api.TessBaseAPISetOutputName(handle, output);
int rc = api.TessBaseAPIInit4(handle, datapath, language,
oem, configs, configs_size, vars_vec, vars_values, vars_vec_size, set_only_init_params);
if (rc != 0) {
logger.error("Could not initialize tesseract.");
String outputbase = "test/test-results/outputbase";
TessResultRenderer renderer = api.TessHOcrRendererCreate(outputbase);
api.TessResultRendererInsert(renderer, api.TessBoxTextRendererCreate(outputbase));
api.TessResultRendererInsert(renderer, api.TessTextRendererCreate(outputbase));
String dataPath = api.TessBaseAPIGetDatapath(handle);
api.TessResultRendererInsert(renderer, api.TessPDFRendererCreate(outputbase, dataPath));
int result = api.TessBaseAPIProcessPages(handle, image, null, 0, renderer);
if (result == FALSE) {
logger.error("Error during processing.");
for (; renderer != null; renderer = api.TessResultRendererNext(renderer)) {
String ext = api.TessResultRendererExtention(renderer).getString(0);
logger.info(String.format("TessResultRendererExtention: %s\nTessResultRendererTitle: %s\nTessResultRendererImageNum: %d",
assertTrue(new File(outputbase + ".pdf").exists());