Update arborescence 2

This commit is contained in:
Louis Calas
2019-10-10 18:01:08 +02:00
parent 29dba04efb
commit a0b8e315db
167 changed files with 3 additions and 5 deletions

View File

@@ -0,0 +1,9 @@
# Set root logger level to DEBUG and its only appender to A1.
log4j.rootLogger=DEBUG, A1
# A1 is set to be a ConsoleAppender.
log4j.appender.A1=org.apache.log4j.ConsoleAppender
# A1 uses PatternLayout.
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n

View File

@@ -0,0 +1,75 @@
/**
* Copyright @ 2014 Quan Nguyen
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package net.sourceforge.tess4j;
import com.sun.jna.Pointer;
import net.sourceforge.tess4j.util.LoggHelper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static net.sourceforge.tess4j.ITessAPI.TRUE;
class ProgressMonitor extends Thread {
ITessAPI.ETEXT_DESC monitor;
StringBuilder outputMessage = new StringBuilder();
private static final Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
public ProgressMonitor(ITessAPI.ETEXT_DESC monitor) {
this.monitor = monitor;
}
public String getMessage() {
return outputMessage.toString();
}
@Override
public void run() {
try {
while (true) {
logger.info("ocr alive: " + (monitor.ocr_alive == TRUE));
logger.info("progress: " + monitor.progress);
outputMessage.append(monitor.more_to_come);
if (monitor.progress >= 100) {
break;
}
Thread.sleep(100);
}
} catch (Exception ioe) {
ioe.printStackTrace();
}
}
/**
* Cancels OCR operation.
*/
public void cancel() {
monitor.cancel = new ITessAPI.CANCEL_FUNC() {
@Override
public boolean invoke(Pointer cancel_this, int words) {
return true;
}
};
}
/**
* Resets cancel flag.
*/
public void reset() {
monitor.cancel = null;
}
}

View File

@@ -0,0 +1,645 @@
/**
* Copyright @ 2012 Quan Nguyen
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package net.sourceforge.tess4j;
import static org.junit.Assert.assertArrayEquals;
import java.awt.image.BufferedImage;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.nio.ByteBuffer;
import java.nio.FloatBuffer;
import java.nio.IntBuffer;
import java.util.Arrays;
import javax.imageio.ImageIO;
import net.sourceforge.tess4j.util.LoggHelper;
import net.sourceforge.tess4j.util.Utils;
import net.sourceforge.tess4j.util.ImageIOHelper;
import com.ochafik.lang.jnaerator.runtime.NativeSize;
import com.sun.jna.NativeLong;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import com.sun.jna.Pointer;
import com.sun.jna.StringArray;
import com.sun.jna.ptr.PointerByReference;
import net.sourceforge.lept4j.Box;
import net.sourceforge.lept4j.Boxa;
import static net.sourceforge.lept4j.ILeptonica.L_CLONE;
import net.sourceforge.lept4j.Leptonica;
import net.sourceforge.lept4j.Leptonica1;
import net.sourceforge.lept4j.Pix;
import net.sourceforge.lept4j.util.LeptUtils;
import net.sourceforge.tess4j.ITessAPI.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static net.sourceforge.tess4j.ITessAPI.FALSE;
import static net.sourceforge.tess4j.ITessAPI.TRUE;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
public class TessAPI1Test {
private static final Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
private final String datapath = ".";
private final String testResourcesDataPath = "test/resources/test-data";
String language = "eng";
String expOCRResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 <lazy> #90 dog";
TessBaseAPI handle;
@BeforeClass
public static void setUpClass() throws Exception {
}
@AfterClass
public static void tearDownClass() throws Exception {
}
@Before
public void setUp() {
handle = TessAPI1.TessBaseAPICreate();
}
@After
public void tearDown() {
TessAPI1.TessBaseAPIDelete(handle);
}
/**
* Test of TessBaseAPIRect method, of class TessAPI1.
*
* @throws java.lang.Exception
*/
@Test
public void testTessBaseAPIRect() throws Exception {
logger.info("TessBaseAPIRect");
String expResult = expOCRResult;
File tiff = new File(testResourcesDataPath, "eurotext.tif");
BufferedImage image = ImageIO.read(tiff); // require jai-imageio lib to read TIFF
ByteBuffer buf = ImageIOHelper.convertImageData(image);
int bpp = image.getColorModel().getPixelSize();
int bytespp = bpp / 8;
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
TessAPI1.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO);
Pointer utf8Text = TessAPI1.TessBaseAPIRect(handle, buf, bytespp, bytespl, 0, 0, image.getWidth(), image.getHeight());
String result = utf8Text.getString(0);
TessAPI1.TessDeleteText(utf8Text);
logger.info(result);
assertEquals(expResult, result.substring(0, expResult.length()));
}
/**
* Test of TessBaseAPIGetUTF8Text method, of class TessAPI1.
*
* @throws java.lang.Exception
*/
@Test
public void testTessBaseAPIGetUTF8Text() throws Exception {
logger.info("TessBaseAPIGetUTF8Text");
String expResult = expOCRResult;
File tiff = new File(testResourcesDataPath, "eurotext.tif");
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
ByteBuffer buf = ImageIOHelper.convertImageData(image);
int bpp = image.getColorModel().getPixelSize();
int bytespp = bpp / 8;
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
TessAPI1.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO);
TessAPI1.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
TessAPI1.TessBaseAPISetRectangle(handle, 0, 0, 1024, 800);
Pointer utf8Text = TessAPI1.TessBaseAPIGetUTF8Text(handle);
String result = utf8Text.getString(0);
TessAPI1.TessDeleteText(utf8Text);
logger.info(result);
assertEquals(expResult, result.substring(0, expResult.length()));
}
/**
* Test of TessBaseAPIGetUTF8Text method, of class TessAPI1.
*
* @throws java.lang.Exception
*/
@Test
public void testTessBaseAPIGetUTF8Text_Pix() throws Exception {
logger.info("TessBaseAPIGetUTF8Text_Pix");
String expResult = expOCRResult;
File tiff = new File(testResourcesDataPath, "eurotext.tif");
Leptonica leptInstance = Leptonica.INSTANCE;
Pix pix = leptInstance.pixRead(tiff.getPath());
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
TessAPI1.TessBaseAPISetImage2(handle, pix);
Pointer utf8Text = TessAPI1.TessBaseAPIGetUTF8Text(handle);
String result = utf8Text.getString(0);
TessAPI1.TessDeleteText(utf8Text);
logger.info(result);
//release Pix resource
PointerByReference pRef = new PointerByReference();
pRef.setValue(pix.getPointer());
leptInstance.pixDestroy(pRef);
assertEquals(expResult, result.substring(0, expResult.length()));
}
/**
* Test of TessBaseAPIGetComponentImages method, of class TessAPI1.
*
* @throws java.lang.Exception
*/
@Test
public void testTessBaseAPIGetComponentImages() throws Exception {
logger.info("TessBaseAPIGetComponentImages");
File image = new File(testResourcesDataPath, "eurotext.png");
int expResult = 12; // number of lines in the test image
Pix pix = Leptonica1.pixRead(image.getPath());
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
TessAPI1.TessBaseAPISetImage2(handle, pix);
PointerByReference pixa = null;
PointerByReference blockids = null;
Boxa boxes = TessAPI1.TessBaseAPIGetComponentImages(handle, TessPageIteratorLevel.RIL_TEXTLINE, TRUE, pixa, blockids);
// boxes = TessAPI1.TessBaseAPIGetRegions(handle, pixa); // equivalent to TessPageIteratorLevel.RIL_BLOCK
int boxCount = Leptonica1.boxaGetCount(boxes);
for (int i = 0; i < boxCount; i++) {
Box box = Leptonica1.boxaGetBox(boxes, i, L_CLONE);
if (box == null) {
continue;
}
TessAPI1.TessBaseAPISetRectangle(handle, box.x, box.y, box.w, box.h);
Pointer utf8Text = TessAPI1.TessBaseAPIGetUTF8Text(handle);
String ocrResult = utf8Text.getString(0);
TessAPI1.TessDeleteText(utf8Text);
int conf = TessAPI1.TessBaseAPIMeanTextConf(handle);
System.out.print(String.format("Box[%d]: x=%d, y=%d, w=%d, h=%d, confidence: %d, text: %s", i, box.x, box.y, box.w, box.h, conf, ocrResult));
LeptUtils.dispose(box);
}
// release Pix and Boxa resources
LeptUtils.dispose(pix);
LeptUtils.dispose(boxes);
assertEquals(expResult, boxCount);
}
/**
* Test of TessVersion method, of class TessAPI1.
*/
@Test
public void testTessVersion() {
logger.info("TessVersion");
String expResult = "3.05.01";
String result = TessAPI1.TessVersion();
logger.info(result);
assertTrue(result.startsWith(expResult));
}
/**
* Test of TessBaseAPIGetBoolVariable method, of class TessAPI1.
*/
@Test
public void testTessBaseAPIGetBoolVariable() {
logger.info("TessBaseAPIGetBoolVariable");
String name = "tessedit_create_hocr";
TessAPI1.TessBaseAPISetVariable(handle, name, "1");
IntBuffer value = IntBuffer.allocate(1);
int result = -1;
if (TessAPI1.TessBaseAPIGetBoolVariable(handle, "tessedit_create_hocr", value) == TRUE) {
result = value.get(0);
}
int expResult = 1;
assertEquals(expResult, result);
}
/**
* Test of TessBaseAPIPrintVariables method, of class TessAPI1.
*
* @throws java.lang.Exception
*/
@Test
public void testTessBaseAPIPrintVariablesToFile() throws Exception {
logger.info("TessBaseAPIPrintVariablesToFile");
String var = "tessedit_char_whitelist";
String value = "0123456789";
TessAPI1.TessBaseAPISetVariable(handle, var, value);
String filename = "printvar.txt";
TessAPI1.TessBaseAPIPrintVariablesToFile(handle, filename); // will crash if not invoked after some method
File file = new File(filename);
BufferedReader input = new BufferedReader(new FileReader(file));
StringBuilder strB = new StringBuilder();
String line;
String EOL = System.getProperty("line.separator");
while ((line = input.readLine()) != null) {
strB.append(line).append(EOL);
}
input.close();
file.delete();
assertTrue(strB.toString().contains(var + "\t" + value));
}
/**
* Test of TessBaseAPIInit4 method, of class TessAPI1.
*/
@Test
public void testTessBaseAPIInit4() {
logger.info("TessBaseAPIInit4");
int oem = TessOcrEngineMode.OEM_DEFAULT;
PointerByReference configs = null;
int configs_size = 0;
// disable loading dictionaries
String[] args = new String[]{"load_system_dawg", "load_freq_dawg"};
StringArray sarray = new StringArray(args);
PointerByReference vars_vec = new PointerByReference();
vars_vec.setPointer(sarray);
args = new String[]{"F", "F"};
sarray = new StringArray(args);
PointerByReference vars_values = new PointerByReference();
vars_values.setPointer(sarray);
NativeSize vars_vec_size = new NativeSize(args.length);
int expResult = 0;
int result = TessAPI1.TessBaseAPIInit4(handle, datapath, language, oem, configs, configs_size, vars_vec, vars_values, vars_vec_size, FALSE);
assertEquals(expResult, result);
}
/**
* Test of TessBaseAPIGetInitLanguagesAsString method, of class TessAPI1.
*/
@Test
public void testTessBaseAPIGetInitLanguagesAsString() {
logger.info("TessBaseAPIGetInitLanguagesAsString");
String expResult = "";
String result = TessAPI1.TessBaseAPIGetInitLanguagesAsString(handle);
assertEquals(expResult, result);
}
/**
* Test of TessBaseAPIGetLoadedLanguagesAsVector method, of class TessAPI1.
*/
@Test
public void testTessBaseAPIGetLoadedLanguagesAsVector() {
logger.info("TessBaseAPIGetLoadedLanguagesAsVector");
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
String[] expResult = {"eng"};
String[] result = TessAPI1.TessBaseAPIGetLoadedLanguagesAsVector(handle).getPointer().getStringArray(0);
assertArrayEquals(expResult, result);
}
/**
* Test of TessBaseAPIGetAvailableLanguagesAsVector method, of class
* TessAPI1.
*/
@Test
public void testTessBaseAPIGetAvailableLanguagesAsVector() {
logger.info("TessBaseAPIGetAvailableLanguagesAsVector");
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
String[] expResult = {"eng"};
String[] result = TessAPI1.TessBaseAPIGetAvailableLanguagesAsVector(handle).getPointer().getStringArray(0);
assertTrue(Arrays.asList(result).containsAll(Arrays.asList(expResult)));
}
/**
* Test of TessBaseAPIGetHOCRText method, of class TessAPI1.
*
* @throws java.lang.Exception
*/
@Test
public void testTessBaseAPIGetHOCRText() throws Exception {
logger.info("TessBaseAPIGetHOCRText");
File tiff = new File(testResourcesDataPath, "eurotext.tif");
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
ByteBuffer buf = ImageIOHelper.convertImageData(image);
int bpp = image.getColorModel().getPixelSize();
int bytespp = bpp / 8;
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
TessAPI1.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO);
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
TessAPI1.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
int page_number = 0;
Pointer utf8Text = TessAPI1.TessBaseAPIGetHOCRText(handle, page_number);
String result = utf8Text.getString(0);
TessAPI1.TessDeleteText(utf8Text);
assertTrue(result.contains("<div class='ocr_page'"));
}
/**
* Test of TessBaseAPIAnalyseLayout method, of class TessAPI1.
*
* @throws java.lang.Exception
*/
@Test
public void testTessBaseAPIAnalyseLayout() throws Exception {
logger.info("TessBaseAPIAnalyseLayout");
File image = new File(testResourcesDataPath, "eurotext.png");
int expResult = 12; // number of lines in the test image
Leptonica leptInstance = Leptonica.INSTANCE;
Pix pix = leptInstance.pixRead(image.getPath());
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
TessAPI1.TessBaseAPISetImage2(handle, pix);
int pageIteratorLevel = TessPageIteratorLevel.RIL_TEXTLINE;
logger.info("PageIteratorLevel: " + Utils.getConstantName(pageIteratorLevel, TessPageIteratorLevel.class));
int i = 0;
TessPageIterator pi = TessAPI1.TessBaseAPIAnalyseLayout(handle);
do {
IntBuffer leftB = IntBuffer.allocate(1);
IntBuffer topB = IntBuffer.allocate(1);
IntBuffer rightB = IntBuffer.allocate(1);
IntBuffer bottomB = IntBuffer.allocate(1);
TessAPI1.TessPageIteratorBoundingBox(pi, pageIteratorLevel, leftB, topB, rightB, bottomB);
int left = leftB.get();
int top = topB.get();
int right = rightB.get();
int bottom = bottomB.get();
logger.info(String.format("Box[%d]: x=%d, y=%d, w=%d, h=%d", i++, left, top, right - left, bottom - top));
} while (TessAPI1.TessPageIteratorNext(pi, pageIteratorLevel) == TRUE);
TessAPI1.TessPageIteratorDelete(pi);
assertEquals(expResult, i);
}
/**
* Test of TessBaseAPIDetectOrientationScript method, of class TessAPI1.
*
* @throws java.lang.Exception
*/
@Test
public void testTessBaseAPIDetectOrientationScript() throws Exception {
logger.info("TessBaseAPIDetectOrientationScript");
File image = new File(testResourcesDataPath, "eurotext.png");
int expResult = TRUE;
Pix pix = Leptonica1.pixRead(image.getPath());
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
TessAPI1.TessBaseAPISetImage2(handle, pix);
IntBuffer orient_degB = IntBuffer.allocate(1);
FloatBuffer orient_confB = FloatBuffer.allocate(1);
PointerByReference script_nameB = new PointerByReference();
FloatBuffer script_confB = FloatBuffer.allocate(1);
int result = TessAPI1.TessBaseAPIDetectOrientationScript(handle, orient_degB, orient_confB, script_nameB, script_confB);
if (result == TRUE) {
int orient_deg = orient_degB.get();
float orient_conf = orient_confB.get();
String script_name = script_nameB.getValue().getString(0);
float script_conf = script_confB.get();
logger.info(String.format("OrientationScript: orient_deg=%d, orient_conf=%f, script_name=%s, script_conf=%f", orient_deg, orient_conf, script_name, script_conf));
}
PointerByReference pRef = new PointerByReference();
pRef.setValue(pix.getPointer());
Leptonica1.pixDestroy(pRef);
assertEquals(expResult, result);
}
/**
* Test of Orientation and script detection (OSD).
*
* @throws java.lang.Exception
*/
@Test
public void testOSD() throws Exception {
logger.info("OSD");
int expResult = TessPageSegMode.PSM_AUTO_OSD;
IntBuffer orientation = IntBuffer.allocate(1);
IntBuffer direction = IntBuffer.allocate(1);
IntBuffer order = IntBuffer.allocate(1);
FloatBuffer deskew_angle = FloatBuffer.allocate(1);
File tiff = new File(testResourcesDataPath, "eurotext.tif");
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
ByteBuffer buf = ImageIOHelper.convertImageData(image);
int bpp = image.getColorModel().getPixelSize();
int bytespp = bpp / 8;
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
TessAPI1.TessBaseAPISetPageSegMode(handle, expResult);
int actualResult = TessAPI1.TessBaseAPIGetPageSegMode(handle);
logger.info("PSM: " + Utils.getConstantName(actualResult, TessPageSegMode.class));
TessAPI1.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
int success = TessAPI1.TessBaseAPIRecognize(handle, null);
if (success == 0) {
TessAPI1.TessPageIterator pi = TessAPI1.TessBaseAPIAnalyseLayout(handle);
TessAPI1.TessPageIteratorOrientation(pi, orientation, direction, order, deskew_angle);
logger.info(String.format(
"Orientation: %s\nWritingDirection: %s\nTextlineOrder: %s\nDeskew angle: %.4f\n",
Utils.getConstantName(orientation.get(), TessOrientation.class),
Utils.getConstantName(direction.get(), TessWritingDirection.class),
Utils.getConstantName(order.get(), TessTextlineOrder.class),
deskew_angle.get()));
}
assertEquals(expResult, actualResult);
}
/**
* Test of ResultIterator and PageIterator.
*
* @throws Exception
*/
@Test
public void testResultIterator() throws Exception {
logger.info("TessBaseAPIGetIterator");
File tiff = new File(testResourcesDataPath, "eurotext.tif");
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
ByteBuffer buf = ImageIOHelper.convertImageData(image);
int bpp = image.getColorModel().getPixelSize();
int bytespp = bpp / 8;
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
TessAPI1.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO);
TessAPI1.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
ETEXT_DESC monitor = new ETEXT_DESC();
ITessAPI.TimeVal timeout = new ITessAPI.TimeVal();
timeout.tv_sec = new NativeLong(0L); // time > 0 causes blank ouput
monitor.end_time = timeout;
ProgressMonitor pmo = new ProgressMonitor(monitor);
pmo.start();
TessAPI1.TessBaseAPIRecognize(handle, monitor);
logger.info("Message: " + pmo.getMessage());
TessResultIterator ri = TessAPI1.TessBaseAPIGetIterator(handle);
TessPageIterator pi = TessAPI1.TessResultIteratorGetPageIterator(ri);
TessAPI1.TessPageIteratorBegin(pi);
logger.info("Bounding boxes:\nchar(s) left top right bottom confidence font-attributes");
int level = TessPageIteratorLevel.RIL_WORD;
// int height = image.getHeight();
do {
Pointer ptr = TessAPI1.TessResultIteratorGetUTF8Text(ri, level);
String word = ptr.getString(0);
TessAPI1.TessDeleteText(ptr);
float confidence = TessAPI1.TessResultIteratorConfidence(ri, level);
IntBuffer leftB = IntBuffer.allocate(1);
IntBuffer topB = IntBuffer.allocate(1);
IntBuffer rightB = IntBuffer.allocate(1);
IntBuffer bottomB = IntBuffer.allocate(1);
TessAPI1.TessPageIteratorBoundingBox(pi, level, leftB, topB, rightB, bottomB);
int left = leftB.get();
int top = topB.get();
int right = rightB.get();
int bottom = bottomB.get();
System.out.print(String.format("%s %d %d %d %d %f", word, left, top, right, bottom, confidence));
// logger.info(String.format("%s %d %d %d %d", str, left, height - bottom, right, height - top)); //
// training box coordinates
IntBuffer boldB = IntBuffer.allocate(1);
IntBuffer italicB = IntBuffer.allocate(1);
IntBuffer underlinedB = IntBuffer.allocate(1);
IntBuffer monospaceB = IntBuffer.allocate(1);
IntBuffer serifB = IntBuffer.allocate(1);
IntBuffer smallcapsB = IntBuffer.allocate(1);
IntBuffer pointSizeB = IntBuffer.allocate(1);
IntBuffer fontIdB = IntBuffer.allocate(1);
String fontName = TessAPI1.TessResultIteratorWordFontAttributes(ri, boldB, italicB, underlinedB,
monospaceB, serifB, smallcapsB, pointSizeB, fontIdB);
boolean bold = boldB.get() == TRUE;
boolean italic = italicB.get() == TRUE;
boolean underlined = underlinedB.get() == TRUE;
boolean monospace = monospaceB.get() == TRUE;
boolean serif = serifB.get() == TRUE;
boolean smallcaps = smallcapsB.get() == TRUE;
int pointSize = pointSizeB.get();
int fontId = fontIdB.get();
logger.info(String.format(" font: %s, size: %d, font id: %d, bold: %b,"
+ " italic: %b, underlined: %b, monospace: %b, serif: %b, smallcap: %b", fontName, pointSize,
fontId, bold, italic, underlined, monospace, serif, smallcaps));
} while (TessAPI1.TessPageIteratorNext(pi, level) == TRUE);
assertTrue(true);
}
/**
* Test of ChoiceIterator.
*
* @throws Exception
*/
@Test
public void testChoiceIterator() throws Exception {
logger.info("TessResultIteratorGetChoiceIterator");
File tiff = new File(testResourcesDataPath, "eurotext.tif");
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
ByteBuffer buf = ImageIOHelper.convertImageData(image);
int bpp = image.getColorModel().getPixelSize();
int bytespp = bpp / 8;
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
TessAPI1.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
TessAPI1.TessBaseAPISetVariable(handle, "save_blob_choices", "T");
TessAPI1.TessBaseAPISetRectangle(handle, 37, 228, 548, 31);
ETEXT_DESC monitor = new ETEXT_DESC();
ProgressMonitor pmo = new ProgressMonitor(monitor);
pmo.start();
TessAPI1.TessBaseAPIRecognize(handle, monitor);
logger.info("Message: " + pmo.getMessage());
TessResultIterator ri = TessAPI1.TessBaseAPIGetIterator(handle);
int level = TessPageIteratorLevel.RIL_SYMBOL;
if (ri != null) {
do {
Pointer symbol = TessAPI1.TessResultIteratorGetUTF8Text(ri, level);
float conf = TessAPI1.TessResultIteratorConfidence(ri, level);
if (symbol != null) {
logger.info(String.format("symbol %s, conf: %f", symbol.getString(0), conf));
boolean indent = false;
TessChoiceIterator ci = TessAPI1.TessResultIteratorGetChoiceIterator(ri);
do {
if (indent) {
System.out.print("\t");
}
System.out.print("\t- ");
String choice = TessAPI1.TessChoiceIteratorGetUTF8Text(ci);
logger.info(String.format("%s conf: %f", choice, TessAPI1.TessChoiceIteratorConfidence(ci)));
indent = true;
} while (TessAPI1.TessChoiceIteratorNext(ci) == ITessAPI.TRUE);
TessAPI1.TessChoiceIteratorDelete(ci);
}
logger.info("---------------------------------------------");
TessAPI1.TessDeleteText(symbol);
} while (TessAPI1.TessResultIteratorNext(ri, level) == ITessAPI.TRUE);
}
assertTrue(true);
}
/**
* Test of ResultRenderer method, of class TessAPI1.
*
* @throws java.lang.Exception
*/
@Test
public void testResultRenderer() throws Exception {
logger.info("TessResultRenderer");
String image = String.format("%s/%s", testResourcesDataPath, "eurotext.tif");
String output = "capi-test.txt";
int set_only_init_params = ITessAPI.FALSE;
int oem = TessOcrEngineMode.OEM_DEFAULT;
PointerByReference configs = null;
int configs_size = 0;
String[] params = {"load_system_dawg", "tessedit_char_whitelist"};
String vals[] = {"F", ""}; //0123456789-.IThisalotfpnex
PointerByReference vars_vec = new PointerByReference();
vars_vec.setPointer(new StringArray(params));
PointerByReference vars_values = new PointerByReference();
vars_values.setPointer(new StringArray(vals));
NativeSize vars_vec_size = new NativeSize(params.length);
TessAPI1.TessBaseAPISetOutputName(handle, output);
int rc = TessAPI1.TessBaseAPIInit4(handle, datapath, language,
oem, configs, configs_size, vars_vec, vars_values, vars_vec_size, set_only_init_params);
if (rc != 0) {
TessAPI1.TessBaseAPIDelete(handle);
logger.error("Could not initialize tesseract.");
return;
}
String outputbase = "test/test-results/outputbase1";
TessResultRenderer renderer = TessAPI1.TessHOcrRendererCreate(outputbase);
TessAPI1.TessResultRendererInsert(renderer, TessAPI1.TessBoxTextRendererCreate(outputbase));
TessAPI1.TessResultRendererInsert(renderer, TessAPI1.TessTextRendererCreate(outputbase));
String dataPath = TessAPI1.TessBaseAPIGetDatapath(handle);
TessAPI1.TessResultRendererInsert(renderer, TessAPI1.TessPDFRendererCreate(outputbase, dataPath));
int result = TessAPI1.TessBaseAPIProcessPages(handle, image, null, 0, renderer);
// if (result == FALSE) {
// logger.error("Error during processing.");
// return;
// }
for (; renderer != null; renderer = TessAPI1.TessResultRendererNext(renderer)) {
String ext = TessAPI1.TessResultRendererExtention(renderer).getString(0);
logger.info(String.format("TessResultRendererExtention: %s\nTessResultRendererTitle: %s\nTessResultRendererImageNum: %d",
ext,
TessAPI1.TessResultRendererTitle(renderer).getString(0),
TessAPI1.TessResultRendererImageNum(renderer)));
}
TessAPI1.TessDeleteResultRenderer(renderer);
assertTrue(new File(outputbase + ".pdf").exists());
}
}

View File

@@ -0,0 +1,625 @@
/*
* Copyright @ 2017 Quan Nguyen
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package net.sourceforge.tess4j;
import com.ochafik.lang.jnaerator.runtime.NativeSize;
import com.sun.jna.Pointer;
import com.sun.jna.ptr.IntByReference;
import com.sun.jna.ptr.PointerByReference;
import java.nio.ByteBuffer;
import java.nio.DoubleBuffer;
import java.nio.FloatBuffer;
import java.nio.IntBuffer;
import net.sourceforge.lept4j.Boxa;
import net.sourceforge.lept4j.Pix;
public class TessAPIImpl implements TessAPI {
public TessAPI getInstance() {
return TessAPI.INSTANCE;
}
public void TessAPIEndPage() {
}
public void TessAPIRelease() {
}
@Override
public String TessVersion() {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public void TessDeleteText(Pointer text) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public void TessDeleteTextArray(PointerByReference arr) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public void TessDeleteIntArray(IntBuffer arr) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public ITessAPI.TessResultRenderer TessTextRendererCreate(String outputbase) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public ITessAPI.TessResultRenderer TessHOcrRendererCreate(String outputbase) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public TessResultRenderer TessHOcrRendererCreate2(String outputbase, int font_info) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public ITessAPI.TessResultRenderer TessPDFRendererCreate(String outputbase, String datadir) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public TessResultRenderer TessPDFRendererCreateTextonly(String outputbase, String datadir, int textonly) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public ITessAPI.TessResultRenderer TessUnlvRendererCreate(String outputbase) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public ITessAPI.TessResultRenderer TessBoxTextRendererCreate(String outputbase) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public void TessDeleteResultRenderer(ITessAPI.TessResultRenderer renderer) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public void TessResultRendererInsert(ITessAPI.TessResultRenderer renderer, ITessAPI.TessResultRenderer next) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public ITessAPI.TessResultRenderer TessResultRendererNext(ITessAPI.TessResultRenderer renderer) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessResultRendererBeginDocument(ITessAPI.TessResultRenderer renderer, String title) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessResultRendererAddImage(ITessAPI.TessResultRenderer renderer, PointerByReference api) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessResultRendererEndDocument(ITessAPI.TessResultRenderer renderer) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public Pointer TessResultRendererExtention(ITessAPI.TessResultRenderer renderer) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public Pointer TessResultRendererTitle(ITessAPI.TessResultRenderer renderer) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessResultRendererImageNum(ITessAPI.TessResultRenderer renderer) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public ITessAPI.TessBaseAPI TessBaseAPICreate() {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public void TessBaseAPIDelete(ITessAPI.TessBaseAPI handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public void TessBaseAPISetInputName(ITessAPI.TessBaseAPI handle, String name) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public String TessBaseAPIGetInputName(ITessAPI.TessBaseAPI handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public void TessBaseAPISetInputImage(ITessAPI.TessBaseAPI handle, Pix pix) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public Pix TessBaseAPIGetInputImage(ITessAPI.TessBaseAPI handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessBaseAPIGetSourceYResolution(ITessAPI.TessBaseAPI handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public String TessBaseAPIGetDatapath(ITessAPI.TessBaseAPI handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public void TessBaseAPISetOutputName(ITessAPI.TessBaseAPI handle, String name) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessBaseAPISetVariable(ITessAPI.TessBaseAPI handle, String name, String value) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessBaseAPIGetIntVariable(ITessAPI.TessBaseAPI handle, String name, IntBuffer value) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessBaseAPIGetBoolVariable(ITessAPI.TessBaseAPI handle, String name, IntBuffer value) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessBaseAPIGetDoubleVariable(ITessAPI.TessBaseAPI handle, String name, DoubleBuffer value) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public String TessBaseAPIGetStringVariable(ITessAPI.TessBaseAPI handle, String name) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public void TessBaseAPIPrintVariablesToFile(ITessAPI.TessBaseAPI handle, String filename) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessBaseAPIInit1(ITessAPI.TessBaseAPI handle, String datapath, String language, int oem, PointerByReference configs, int configs_size) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessBaseAPIInit2(ITessAPI.TessBaseAPI handle, String datapath, String language, int oem) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessBaseAPIInit3(ITessAPI.TessBaseAPI handle, String datapath, String language) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessBaseAPIInit4(ITessAPI.TessBaseAPI handle, String datapath, String language, int oem, PointerByReference configs, int configs_size, PointerByReference vars_vec, PointerByReference vars_values, NativeSize vars_vec_size, int set_only_non_debug_params) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public String TessBaseAPIGetInitLanguagesAsString(ITessAPI.TessBaseAPI handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public PointerByReference TessBaseAPIGetLoadedLanguagesAsVector(ITessAPI.TessBaseAPI handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public PointerByReference TessBaseAPIGetAvailableLanguagesAsVector(ITessAPI.TessBaseAPI handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessBaseAPIInitLangMod(ITessAPI.TessBaseAPI handle, String datapath, String language) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public void TessBaseAPIInitForAnalysePage(ITessAPI.TessBaseAPI handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public void TessBaseAPIReadConfigFile(ITessAPI.TessBaseAPI handle, String filename, int init_only) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public void TessBaseAPISetPageSegMode(ITessAPI.TessBaseAPI handle, int mode) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessBaseAPIGetPageSegMode(ITessAPI.TessBaseAPI handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public Pointer TessBaseAPIRect(ITessAPI.TessBaseAPI handle, ByteBuffer imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public void TessBaseAPIClearAdaptiveClassifier(ITessAPI.TessBaseAPI handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public void TessBaseAPISetImage(ITessAPI.TessBaseAPI handle, ByteBuffer imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public void TessBaseAPISetImage2(ITessAPI.TessBaseAPI handle, Pix pix) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public void TessBaseAPISetSourceResolution(ITessAPI.TessBaseAPI handle, int ppi) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public void TessBaseAPISetRectangle(ITessAPI.TessBaseAPI handle, int left, int top, int width, int height) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public Pix TessBaseAPIGetThresholdedImage(ITessAPI.TessBaseAPI handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public Boxa TessBaseAPIGetRegions(ITessAPI.TessBaseAPI handle, PointerByReference pixa) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public Boxa TessBaseAPIGetTextlines(ITessAPI.TessBaseAPI handle, PointerByReference pixa, PointerByReference blockids) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public Boxa TessBaseAPIGetTextlines1(ITessAPI.TessBaseAPI handle, int raw_image, int raw_padding, PointerByReference pixa, PointerByReference blockids, PointerByReference paraids) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public Boxa TessBaseAPIGetStrips(ITessAPI.TessBaseAPI handle, PointerByReference pixa, PointerByReference blockids) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public Boxa TessBaseAPIGetWords(ITessAPI.TessBaseAPI handle, PointerByReference pixa) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public Boxa TessBaseAPIGetConnectedComponents(ITessAPI.TessBaseAPI handle, PointerByReference cc) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public Boxa TessBaseAPIGetComponentImages(ITessAPI.TessBaseAPI handle, int level, int text_only, PointerByReference pixa, PointerByReference blockids) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public Boxa TessBaseAPIGetComponentImages1(ITessAPI.TessBaseAPI handle, int level, int text_only, int raw_image, int raw_padding, PointerByReference pixa, PointerByReference blockids, PointerByReference paraids) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessBaseAPIGetThresholdedImageScaleFactor(ITessAPI.TessBaseAPI handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public void TessBaseAPIDumpPGM(ITessAPI.TessBaseAPI handle, String filename) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public ITessAPI.TessPageIterator TessBaseAPIAnalyseLayout(ITessAPI.TessBaseAPI handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessBaseAPIRecognize(ITessAPI.TessBaseAPI handle, ITessAPI.ETEXT_DESC monitor) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessBaseAPIRecognizeForChopTest(ITessAPI.TessBaseAPI handle, ITessAPI.ETEXT_DESC monitor) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public ITessAPI.TessResultIterator TessBaseAPIGetIterator(ITessAPI.TessBaseAPI handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public ITessAPI.TessMutableIterator TessBaseAPIGetMutableIterator(ITessAPI.TessBaseAPI handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessBaseAPIProcessPages(ITessAPI.TessBaseAPI handle, String filename, String retry_config, int timeout_millisec, ITessAPI.TessResultRenderer renderer) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessBaseAPIProcessPage(ITessAPI.TessBaseAPI handle, Pix pix, int page_index, String filename, String retry_config, int timeout_millisec, ITessAPI.TessResultRenderer renderer) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public Pointer TessBaseAPIGetUTF8Text(ITessAPI.TessBaseAPI handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public Pointer TessBaseAPIGetHOCRText(ITessAPI.TessBaseAPI handle, int page_number) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public Pointer TessBaseAPIGetBoxText(ITessAPI.TessBaseAPI handle, int page_number) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public Pointer TessBaseAPIGetUNLVText(ITessAPI.TessBaseAPI handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessBaseAPIMeanTextConf(ITessAPI.TessBaseAPI handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public IntByReference TessBaseAPIAllWordConfidences(ITessAPI.TessBaseAPI handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessBaseAPIAdaptToWordStr(ITessAPI.TessBaseAPI handle, int mode, String wordstr) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public void TessBaseAPIClear(ITessAPI.TessBaseAPI handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public void TessBaseAPIEnd(ITessAPI.TessBaseAPI handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessBaseAPIIsValidWord(ITessAPI.TessBaseAPI handle, String word) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessBaseAPIGetTextDirection(ITessAPI.TessBaseAPI handle, IntBuffer out_offset, FloatBuffer out_slope) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public void TessBaseAPIClearPersistentCache(ITessAPI.TessBaseAPI handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessBaseAPIDetectOrientationScript(TessBaseAPI handle, IntBuffer orient_deg, FloatBuffer orient_conf, PointerByReference script_name, FloatBuffer script_conf) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public String TessBaseAPIGetUnichar(ITessAPI.TessBaseAPI handle, int unichar_id) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public void TessPageIteratorDelete(ITessAPI.TessPageIterator handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public ITessAPI.TessPageIterator TessPageIteratorCopy(ITessAPI.TessPageIterator handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public void TessPageIteratorBegin(ITessAPI.TessPageIterator handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessPageIteratorNext(ITessAPI.TessPageIterator handle, int level) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessPageIteratorIsAtBeginningOf(ITessAPI.TessPageIterator handle, int level) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessPageIteratorIsAtFinalElement(ITessAPI.TessPageIterator handle, int level, int element) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessPageIteratorBoundingBox(ITessAPI.TessPageIterator handle, int level, IntBuffer left, IntBuffer top, IntBuffer right, IntBuffer bottom) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessPageIteratorBlockType(ITessAPI.TessPageIterator handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public Pix TessPageIteratorGetBinaryImage(ITessAPI.TessPageIterator handle, int level) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public Pix TessPageIteratorGetImage(ITessAPI.TessPageIterator handle, int level, int padding, Pix original_image, IntBuffer left, IntBuffer top) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessPageIteratorBaseline(ITessAPI.TessPageIterator handle, int level, IntBuffer x1, IntBuffer y1, IntBuffer x2, IntBuffer y2) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public void TessPageIteratorOrientation(ITessAPI.TessPageIterator handle, IntBuffer orientation, IntBuffer writing_direction, IntBuffer textline_order, FloatBuffer deskew_angle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public void TessPageIteratorParagraphInfo(ITessAPI.TessPageIterator handle, IntBuffer justification, IntBuffer is_list_item, IntBuffer is_crown, IntBuffer first_line_indent) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public void TessResultIteratorDelete(ITessAPI.TessResultIterator handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public ITessAPI.TessResultIterator TessResultIteratorCopy(ITessAPI.TessResultIterator handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public ITessAPI.TessPageIterator TessResultIteratorGetPageIterator(ITessAPI.TessResultIterator handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public ITessAPI.TessPageIterator TessResultIteratorGetPageIteratorConst(ITessAPI.TessResultIterator handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessResultIteratorNext(ITessAPI.TessResultIterator handle, int level) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public Pointer TessResultIteratorGetUTF8Text(ITessAPI.TessResultIterator handle, int level) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public float TessResultIteratorConfidence(ITessAPI.TessResultIterator handle, int level) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public String TessResultIteratorWordRecognitionLanguage(ITessAPI.TessResultIterator handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public String TessResultIteratorWordFontAttributes(ITessAPI.TessResultIterator handle, IntBuffer is_bold, IntBuffer is_italic, IntBuffer is_underlined, IntBuffer is_monospace, IntBuffer is_serif, IntBuffer is_smallcaps, IntBuffer pointsize, IntBuffer font_id) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessResultIteratorWordIsFromDictionary(ITessAPI.TessResultIterator handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessResultIteratorWordIsNumeric(ITessAPI.TessResultIterator handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessResultIteratorSymbolIsSuperscript(ITessAPI.TessResultIterator handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessResultIteratorSymbolIsSubscript(ITessAPI.TessResultIterator handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessResultIteratorSymbolIsDropcap(ITessAPI.TessResultIterator handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public ITessAPI.TessChoiceIterator TessResultIteratorGetChoiceIterator(ITessAPI.TessResultIterator handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public void TessChoiceIteratorDelete(ITessAPI.TessChoiceIterator handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public int TessChoiceIteratorNext(ITessAPI.TessChoiceIterator handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public String TessChoiceIteratorGetUTF8Text(ITessAPI.TessChoiceIterator handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public float TessChoiceIteratorConfidence(ITessAPI.TessChoiceIterator handle) {
throw new UnsupportedOperationException("Not supported yet.");
}
}

View File

@@ -0,0 +1,648 @@
/**
* Copyright @ 2012 Quan Nguyen
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package net.sourceforge.tess4j;
import static org.junit.Assert.assertArrayEquals;
import java.awt.image.BufferedImage;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.nio.ByteBuffer;
import java.nio.FloatBuffer;
import java.nio.IntBuffer;
import java.util.Arrays;
import javax.imageio.ImageIO;
import net.sourceforge.tess4j.util.ImageIOHelper;
import net.sourceforge.tess4j.util.LoggHelper;
import net.sourceforge.tess4j.util.Utils;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import com.ochafik.lang.jnaerator.runtime.NativeSize;
import com.sun.jna.NativeLong;
import com.sun.jna.Pointer;
import com.sun.jna.StringArray;
import com.sun.jna.ptr.PointerByReference;
import net.sourceforge.lept4j.Box;
import net.sourceforge.lept4j.Boxa;
import static net.sourceforge.lept4j.ILeptonica.L_CLONE;
import net.sourceforge.lept4j.Leptonica;
import net.sourceforge.lept4j.Pix;
import net.sourceforge.lept4j.util.LeptUtils;
import net.sourceforge.tess4j.ITessAPI.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static net.sourceforge.tess4j.ITessAPI.FALSE;
import static net.sourceforge.tess4j.ITessAPI.TRUE;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
public class TessAPITest {
private static final Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
private final String datapath = ".";
private final String testResourcesDataPath = "test/resources/test-data";
String language = "eng";
String expOCRResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 <lazy> #90 dog";
TessAPI api;
TessBaseAPI handle;
@BeforeClass
public static void setUpClass() throws Exception {
}
@AfterClass
public static void tearDownClass() throws Exception {
}
@Before
public void setUp() {
api = new TessAPIImpl().getInstance();
handle = api.TessBaseAPICreate();
}
@After
public void tearDown() {
api.TessBaseAPIDelete(handle);
}
/**
* Test of TessBaseAPIRect method, of class TessAPI.
*
* @throws java.lang.Exception
*/
@Test
public void testTessBaseAPIRect() throws Exception {
logger.info("TessBaseAPIRect");
String expResult = expOCRResult;
File tiff = new File(testResourcesDataPath, "eurotext.tif");
BufferedImage image = ImageIO.read(tiff); // require jai-imageio lib to read TIFF
ByteBuffer buf = ImageIOHelper.convertImageData(image);
int bpp = image.getColorModel().getPixelSize();
int bytespp = bpp / 8;
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
api.TessBaseAPIInit3(handle, datapath, language);
api.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO);
Pointer utf8Text = api.TessBaseAPIRect(handle, buf, bytespp, bytespl, 90, 50, 862, 614);
String result = utf8Text.getString(0);
api.TessDeleteText(utf8Text);
logger.info(result);
assertTrue(result.startsWith(expResult));
}
/**
* Test of TessBaseAPIGetUTF8Text method, of class TessAPI.
*
* @throws java.lang.Exception
*/
@Test
public void testTessBaseAPIGetUTF8Text() throws Exception {
logger.info("TessBaseAPIGetUTF8Text");
String expResult = expOCRResult;
File tiff = new File(testResourcesDataPath, "eurotext.tif");
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
ByteBuffer buf = ImageIOHelper.convertImageData(image);
int bpp = image.getColorModel().getPixelSize();
int bytespp = bpp / 8;
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
api.TessBaseAPIInit3(handle, datapath, language);
api.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO);
api.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
api.TessBaseAPISetRectangle(handle, 90, 50, 862, 614);
Pointer utf8Text = api.TessBaseAPIGetUTF8Text(handle);
String result = utf8Text.getString(0);
api.TessDeleteText(utf8Text);
logger.info(result);
assertTrue(result.startsWith(expResult));
}
/**
* Test of TessBaseAPIGetUTF8Text method, of class TessAPI.
*
* @throws java.lang.Exception
*/
@Test
public void testTessBaseAPIGetUTF8Text_Pix() throws Exception {
logger.info("TessBaseAPIGetUTF8Text_Pix");
String expResult = expOCRResult;
File tiff = new File(testResourcesDataPath, "eurotext.tif");
Leptonica leptInstance = Leptonica.INSTANCE;
Pix pix = leptInstance.pixRead(tiff.getPath());
api.TessBaseAPIInit3(handle, datapath, language);
api.TessBaseAPISetImage2(handle, pix);
Pointer utf8Text = api.TessBaseAPIGetUTF8Text(handle);
String result = utf8Text.getString(0);
api.TessDeleteText(utf8Text);
logger.info(result);
//release Pix resource
PointerByReference pRef = new PointerByReference();
pRef.setValue(pix.getPointer());
leptInstance.pixDestroy(pRef);
assertTrue(result.startsWith(expResult));
}
/**
* Test of TessBaseAPIGetComponentImages method, of class TessAPI.
*
* @throws java.lang.Exception
*/
@Test
public void testTessBaseAPIGetComponentImages() throws Exception {
logger.info("TessBaseAPIGetComponentImages");
File image = new File(testResourcesDataPath, "eurotext.png");
int expResult = 12; // number of lines in the test image
Leptonica leptInstance = Leptonica.INSTANCE;
Pix pix = leptInstance.pixRead(image.getPath());
api.TessBaseAPIInit3(handle, datapath, language);
api.TessBaseAPISetImage2(handle, pix);
PointerByReference pixa = null;
PointerByReference blockids = null;
Boxa boxes = api.TessBaseAPIGetComponentImages(handle, TessPageIteratorLevel.RIL_TEXTLINE, TRUE, pixa, blockids);
// boxes = api.TessBaseAPIGetRegions(handle, pixa); // equivalent to TessPageIteratorLevel.RIL_BLOCK
int boxCount = leptInstance.boxaGetCount(boxes);
for (int i = 0; i < boxCount; i++) {
Box box = leptInstance.boxaGetBox(boxes, i, L_CLONE);
if (box == null) {
continue;
}
api.TessBaseAPISetRectangle(handle, box.x, box.y, box.w, box.h);
Pointer utf8Text = api.TessBaseAPIGetUTF8Text(handle);
String ocrResult = utf8Text.getString(0);
api.TessDeleteText(utf8Text);
int conf = api.TessBaseAPIMeanTextConf(handle);
System.out.print(String.format("Box[%d]: x=%d, y=%d, w=%d, h=%d, confidence: %d, text: %s", i, box.x, box.y, box.w, box.h, conf, ocrResult));
LeptUtils.dispose(box);
}
// release Pix and Boxa resources
LeptUtils.dispose(pix);
LeptUtils.dispose(boxes);
assertEquals(expResult, boxCount);
}
/**
* Test of TessVersion method, of class TessAPI.
*/
@Test
public void testTessVersion() {
logger.info("TessVersion");
String expResult = "3.05.01";
String result = api.TessVersion();
logger.info(result);
assertTrue(result.startsWith(expResult));
}
/**
* Test of TessBaseAPIGetBoolVariable method, of class TessAPI.
*/
@Test
public void testTessBaseAPIGetBoolVariable() {
logger.info("TessBaseAPIGetBoolVariable");
String name = "tessedit_create_hocr";
api.TessBaseAPISetVariable(handle, name, "1");
IntBuffer value = IntBuffer.allocate(1);
int result = -1;
if (api.TessBaseAPIGetBoolVariable(handle, "tessedit_create_hocr", value) == TRUE) {
result = value.get(0);
}
int expResult = 1;
assertEquals(expResult, result);
}
/**
* Test of TessBaseAPIPrintVariables method, of class TessAPI.
*
* @throws java.lang.Exception
*/
@Test
public void testTessBaseAPIPrintVariablesToFile() throws Exception {
logger.info("TessBaseAPIPrintVariablesToFile");
String var = "tessedit_char_whitelist";
String value = "0123456789";
api.TessBaseAPISetVariable(handle, var, value);
String filename = "printvar.txt";
api.TessBaseAPIPrintVariablesToFile(handle, filename); // will crash if not invoked after some method
File file = new File(filename);
BufferedReader input = new BufferedReader(new FileReader(file));
StringBuilder strB = new StringBuilder();
String line;
String EOL = System.getProperty("line.separator");
while ((line = input.readLine()) != null) {
strB.append(line).append(EOL);
}
input.close();
file.delete();
assertTrue(strB.toString().contains(var + "\t" + value));
}
/**
* Test of TessBaseAPIInit4 method, of class TessAPI.
*/
@Test
public void testTessBaseAPIInit4() {
logger.info("TessBaseAPIInit4");
int oem = TessOcrEngineMode.OEM_DEFAULT;
PointerByReference configs = null; //new PointerByReference();
int configs_size = 0;
// disable loading dictionaries
String[] args = new String[]{"load_system_dawg", "load_freq_dawg"};
StringArray sarray = new StringArray(args);
PointerByReference vars_vec = new PointerByReference();
vars_vec.setPointer(sarray);
args = new String[]{"F", "F"};
sarray = new StringArray(args);
PointerByReference vars_values = new PointerByReference();
vars_values.setPointer(sarray);
NativeSize vars_vec_size = new NativeSize(args.length);
int expResult = 0;
int result = api.TessBaseAPIInit4(handle, datapath, language, oem, configs, configs_size, vars_vec, vars_values, vars_vec_size, FALSE);
assertEquals(expResult, result);
}
/**
* Test of TessBaseAPIGetInitLanguagesAsString method, of class TessAPI.
*/
@Test
public void testTessBaseAPIGetInitLanguagesAsString() {
logger.info("TessBaseAPIGetInitLanguagesAsString");
String expResult = "";
String result = api.TessBaseAPIGetInitLanguagesAsString(handle);
assertEquals(expResult, result);
}
/**
* Test of TessBaseAPIGetLoadedLanguagesAsVector method, of class TessAPI.
*/
@Test
public void testTessBaseAPIGetLoadedLanguagesAsVector() {
logger.info("TessBaseAPIGetLoadedLanguagesAsVector");
api.TessBaseAPIInit3(handle, datapath, language);
String[] expResult = {"eng"};
String[] result = api.TessBaseAPIGetLoadedLanguagesAsVector(handle).getPointer().getStringArray(0);
assertArrayEquals(expResult, result);
}
/**
* Test of TessBaseAPIGetAvailableLanguagesAsVector method, of class
* TessAPI.
*/
@Test
public void testTessBaseAPIGetAvailableLanguagesAsVector() {
logger.info("TessBaseAPIGetAvailableLanguagesAsVector");
api.TessBaseAPIInit3(handle, datapath, language);
String[] expResult = {"eng"};
String[] result = api.TessBaseAPIGetAvailableLanguagesAsVector(handle).getPointer().getStringArray(0);
assertTrue(Arrays.asList(result).containsAll(Arrays.asList(expResult)));
}
/**
* Test of TessBaseAPIGetHOCRText method, of class TessAPI.
*
* @throws java.lang.Exception
*/
@Test
public void testTessBaseAPIGetHOCRText() throws Exception {
logger.info("TessBaseAPIGetHOCRText");
File tiff = new File(testResourcesDataPath, "eurotext.tif");
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
ByteBuffer buf = ImageIOHelper.convertImageData(image);
int bpp = image.getColorModel().getPixelSize();
int bytespp = bpp / 8;
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
api.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO);
api.TessBaseAPIInit3(handle, datapath, language);
api.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
int page_number = 0;
Pointer utf8Text = api.TessBaseAPIGetHOCRText(handle, page_number);
String result = utf8Text.getString(0);
api.TessDeleteText(utf8Text);
assertTrue(result.contains("<div class='ocr_page'"));
}
/**
* Test of TessBaseAPIAnalyseLayout method, of class TessAPI.
*
* @throws java.lang.Exception
*/
@Test
public void testTessBaseAPIAnalyseLayout() throws Exception {
logger.info("TessBaseAPIAnalyseLayout");
File image = new File(testResourcesDataPath, "eurotext.png");
int expResult = 12; // number of lines in the test image
Leptonica leptInstance = Leptonica.INSTANCE;
Pix pix = leptInstance.pixRead(image.getPath());
api.TessBaseAPIInit3(handle, datapath, language);
api.TessBaseAPISetImage2(handle, pix);
int pageIteratorLevel = TessPageIteratorLevel.RIL_TEXTLINE;
logger.info("PageIteratorLevel: " + Utils.getConstantName(pageIteratorLevel, TessPageIteratorLevel.class));
int i = 0;
TessPageIterator pi = api.TessBaseAPIAnalyseLayout(handle);
do {
IntBuffer leftB = IntBuffer.allocate(1);
IntBuffer topB = IntBuffer.allocate(1);
IntBuffer rightB = IntBuffer.allocate(1);
IntBuffer bottomB = IntBuffer.allocate(1);
api.TessPageIteratorBoundingBox(pi, pageIteratorLevel, leftB, topB, rightB, bottomB);
int left = leftB.get();
int top = topB.get();
int right = rightB.get();
int bottom = bottomB.get();
logger.info(String.format("Box[%d]: x=%d, y=%d, w=%d, h=%d", i++, left, top, right - left, bottom - top));
} while (api.TessPageIteratorNext(pi, pageIteratorLevel) == TRUE);
api.TessPageIteratorDelete(pi);
assertEquals(expResult, i);
}
/**
* Test of TessBaseAPIDetectOrientationScript method, of class TessAPI.
*
* @throws java.lang.Exception
*/
@Test
public void testTessBaseAPIDetectOrientationScript() throws Exception {
logger.info("TessBaseAPIDetectOrientationScript");
File image = new File(testResourcesDataPath, "eurotext.png");
int expResult = TRUE;
Leptonica leptInstance = Leptonica.INSTANCE;
Pix pix = leptInstance.pixRead(image.getPath());
api.TessBaseAPIInit3(handle, datapath, language);
api.TessBaseAPISetImage2(handle, pix);
IntBuffer orient_degB = IntBuffer.allocate(1);
FloatBuffer orient_confB = FloatBuffer.allocate(1);
PointerByReference script_nameB = new PointerByReference();
FloatBuffer script_confB = FloatBuffer.allocate(1);
int result = api.TessBaseAPIDetectOrientationScript(handle, orient_degB, orient_confB, script_nameB, script_confB);
if (result == TRUE) {
int orient_deg = orient_degB.get();
float orient_conf = orient_confB.get();
String script_name = script_nameB.getValue().getString(0);
float script_conf = script_confB.get();
logger.info(String.format("OrientationScript: orient_deg=%d, orient_conf=%f, script_name=%s, script_conf=%f", orient_deg, orient_conf, script_name, script_conf));
}
PointerByReference pRef = new PointerByReference();
pRef.setValue(pix.getPointer());
leptInstance.pixDestroy(pRef);
assertEquals(expResult, result);
}
/**
* Test of Orientation and script detection (OSD).
*
* @throws java.lang.Exception
*/
@Test
public void testOSD() throws Exception {
logger.info("OSD");
int expResult = TessPageSegMode.PSM_AUTO_OSD;
IntBuffer orientation = IntBuffer.allocate(1);
IntBuffer direction = IntBuffer.allocate(1);
IntBuffer order = IntBuffer.allocate(1);
FloatBuffer deskew_angle = FloatBuffer.allocate(1);
File tiff = new File(testResourcesDataPath, "eurotext.tif");
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
ByteBuffer buf = ImageIOHelper.convertImageData(image);
int bpp = image.getColorModel().getPixelSize();
int bytespp = bpp / 8;
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
api.TessBaseAPIInit3(handle, datapath, language);
api.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO_OSD);
int actualResult = api.TessBaseAPIGetPageSegMode(handle);
logger.info("PSM: " + Utils.getConstantName(actualResult, TessPageSegMode.class));
api.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
int success = api.TessBaseAPIRecognize(handle, null);
if (success == 0) {
TessPageIterator pi = api.TessBaseAPIAnalyseLayout(handle);
api.TessPageIteratorOrientation(pi, orientation, direction, order, deskew_angle);
logger.info(String.format(
"Orientation: %s\nWritingDirection: %s\nTextlineOrder: %s\nDeskew angle: %.4f\n",
Utils.getConstantName(orientation.get(), TessOrientation.class),
Utils.getConstantName(direction.get(), TessWritingDirection.class),
Utils.getConstantName(order.get(), TessTextlineOrder.class),
deskew_angle.get()));
}
assertEquals(expResult, actualResult);
}
/**
* Test of ResultIterator and PageIterator.
*
* @throws Exception
*/
@Test
public void testResultIterator() throws Exception {
logger.info("TessBaseAPIGetIterator");
File tiff = new File(testResourcesDataPath, "eurotext.tif");
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
ByteBuffer buf = ImageIOHelper.convertImageData(image);
int bpp = image.getColorModel().getPixelSize();
int bytespp = bpp / 8;
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
api.TessBaseAPIInit3(handle, datapath, language);
api.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO);
api.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
ETEXT_DESC monitor = new ETEXT_DESC();
TimeVal timeout = new TimeVal();
timeout.tv_sec = new NativeLong(0L); // time > 0 causes blank ouput
monitor.end_time = timeout;
ProgressMonitor pmo = new ProgressMonitor(monitor);
pmo.start();
api.TessBaseAPIRecognize(handle, monitor);
logger.info("Message: " + pmo.getMessage());
TessResultIterator ri = api.TessBaseAPIGetIterator(handle);
TessPageIterator pi = api.TessResultIteratorGetPageIterator(ri);
api.TessPageIteratorBegin(pi);
logger.info("Bounding boxes:\nchar(s) left top right bottom confidence font-attributes");
int level = TessPageIteratorLevel.RIL_WORD;
// int height = image.getHeight();
do {
Pointer ptr = api.TessResultIteratorGetUTF8Text(ri, level);
String word = ptr.getString(0);
api.TessDeleteText(ptr);
float confidence = api.TessResultIteratorConfidence(ri, level);
IntBuffer leftB = IntBuffer.allocate(1);
IntBuffer topB = IntBuffer.allocate(1);
IntBuffer rightB = IntBuffer.allocate(1);
IntBuffer bottomB = IntBuffer.allocate(1);
api.TessPageIteratorBoundingBox(pi, level, leftB, topB, rightB, bottomB);
int left = leftB.get();
int top = topB.get();
int right = rightB.get();
int bottom = bottomB.get();
System.out.print(String.format("%s %d %d %d %d %f", word, left, top, right, bottom, confidence));
// logger.info(String.format("%s %d %d %d %d", str, left, height - bottom, right, height - top)); //
// training box coordinates
IntBuffer boldB = IntBuffer.allocate(1);
IntBuffer italicB = IntBuffer.allocate(1);
IntBuffer underlinedB = IntBuffer.allocate(1);
IntBuffer monospaceB = IntBuffer.allocate(1);
IntBuffer serifB = IntBuffer.allocate(1);
IntBuffer smallcapsB = IntBuffer.allocate(1);
IntBuffer pointSizeB = IntBuffer.allocate(1);
IntBuffer fontIdB = IntBuffer.allocate(1);
String fontName = api.TessResultIteratorWordFontAttributes(ri, boldB, italicB, underlinedB, monospaceB,
serifB, smallcapsB, pointSizeB, fontIdB);
boolean bold = boldB.get() == TRUE;
boolean italic = italicB.get() == TRUE;
boolean underlined = underlinedB.get() == TRUE;
boolean monospace = monospaceB.get() == TRUE;
boolean serif = serifB.get() == TRUE;
boolean smallcaps = smallcapsB.get() == TRUE;
int pointSize = pointSizeB.get();
int fontId = fontIdB.get();
logger.info(String.format(" font: %s, size: %d, font id: %d, bold: %b,"
+ " italic: %b, underlined: %b, monospace: %b, serif: %b, smallcap: %b", fontName, pointSize,
fontId, bold, italic, underlined, monospace, serif, smallcaps));
} while (api.TessPageIteratorNext(pi, level) == TRUE);
assertTrue(true);
}
/**
* Test of ChoiceIterator.
*
* @throws Exception
*/
@Test
public void testChoiceIterator() throws Exception {
logger.info("TessResultIteratorGetChoiceIterator");
File tiff = new File(testResourcesDataPath, "eurotext.tif");
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
ByteBuffer buf = ImageIOHelper.convertImageData(image);
int bpp = image.getColorModel().getPixelSize();
int bytespp = bpp / 8;
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
api.TessBaseAPIInit3(handle, datapath, language);
api.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
api.TessBaseAPISetVariable(handle, "save_blob_choices", "T");
api.TessBaseAPISetRectangle(handle, 37, 228, 548, 31);
ETEXT_DESC monitor = new ETEXT_DESC();
ProgressMonitor pmo = new ProgressMonitor(monitor);
pmo.start();
api.TessBaseAPIRecognize(handle, monitor);
logger.info("Message: " + pmo.getMessage());
TessResultIterator ri = api.TessBaseAPIGetIterator(handle);
int level = TessPageIteratorLevel.RIL_SYMBOL;
if (ri != null) {
do {
Pointer symbol = api.TessResultIteratorGetUTF8Text(ri, level);
float conf = api.TessResultIteratorConfidence(ri, level);
if (symbol != null) {
logger.info(String.format("symbol %s, conf: %f", symbol.getString(0), conf));
boolean indent = false;
TessChoiceIterator ci = api.TessResultIteratorGetChoiceIterator(ri);
do {
if (indent) {
System.out.print("\t");
}
System.out.print("\t- ");
String choice = api.TessChoiceIteratorGetUTF8Text(ci);
logger.info(String.format("%s conf: %f", choice, api.TessChoiceIteratorConfidence(ci)));
indent = true;
} while (api.TessChoiceIteratorNext(ci) == ITessAPI.TRUE);
api.TessChoiceIteratorDelete(ci);
}
logger.info("---------------------------------------------");
api.TessDeleteText(symbol);
} while (api.TessResultIteratorNext(ri, level) == ITessAPI.TRUE);
}
assertTrue(true);
}
/**
* Test of ResultRenderer method, of class TessAPI.
*
* @throws java.lang.Exception
*/
@Test
public void testResultRenderer() throws Exception {
logger.info("TessResultRenderer");
String image = String.format("%s/%s", testResourcesDataPath, "eurotext.tif");
String output = "capi-test.txt";
int set_only_init_params = FALSE;
int oem = TessOcrEngineMode.OEM_DEFAULT;
PointerByReference configs = null;
int configs_size = 0;
String[] params = {"load_system_dawg", "tessedit_char_whitelist"};
String vals[] = {"F", ""}; //0123456789-.IThisalotfpnex
PointerByReference vars_vec = new PointerByReference();
vars_vec.setPointer(new StringArray(params));
PointerByReference vars_values = new PointerByReference();
vars_values.setPointer(new StringArray(vals));
NativeSize vars_vec_size = new NativeSize(params.length);
api.TessBaseAPISetOutputName(handle, output);
int rc = api.TessBaseAPIInit4(handle, datapath, language,
oem, configs, configs_size, vars_vec, vars_values, vars_vec_size, set_only_init_params);
if (rc != 0) {
api.TessBaseAPIDelete(handle);
logger.error("Could not initialize tesseract.");
return;
}
String outputbase = "test/test-results/outputbase";
TessResultRenderer renderer = api.TessHOcrRendererCreate(outputbase);
api.TessResultRendererInsert(renderer, api.TessBoxTextRendererCreate(outputbase));
api.TessResultRendererInsert(renderer, api.TessTextRendererCreate(outputbase));
String dataPath = api.TessBaseAPIGetDatapath(handle);
api.TessResultRendererInsert(renderer, api.TessPDFRendererCreate(outputbase, dataPath));
int result = api.TessBaseAPIProcessPages(handle, image, null, 0, renderer);
if (result == FALSE) {
logger.error("Error during processing.");
return;
}
for (; renderer != null; renderer = api.TessResultRendererNext(renderer)) {
String ext = api.TessResultRendererExtention(renderer).getString(0);
logger.info(String.format("TessResultRendererExtention: %s\nTessResultRendererTitle: %s\nTessResultRendererImageNum: %d",
ext,
api.TessResultRendererTitle(renderer).getString(0),
api.TessResultRendererImageNum(renderer)));
}
api.TessDeleteResultRenderer(renderer);
assertTrue(new File(outputbase + ".pdf").exists());
}
}

View File

@@ -0,0 +1,267 @@
/**
* Copyright @ 2010 Quan Nguyen
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package net.sourceforge.tess4j;
import java.awt.Rectangle;
import java.awt.image.BufferedImage;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import java.util.Arrays;
import javax.imageio.IIOImage;
import javax.imageio.ImageIO;
import net.sourceforge.tess4j.util.LoggHelper;
import net.sourceforge.tess4j.util.Utils;
import net.sourceforge.tess4j.util.ImageHelper;
import net.sourceforge.tess4j.util.ImageIOHelper;
import net.sourceforge.tess4j.ITesseract.RenderedFormat;
import net.sourceforge.tess4j.ITessAPI.TessPageIteratorLevel;
import com.recognition.software.jdeskew.ImageDeskew;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
public class Tesseract1Test {
private static final Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
static final double MINIMUM_DESKEW_THRESHOLD = 0.05d;
ITesseract instance;
private final String datapath = ".";
private final String testResourcesDataPath = "test/resources/test-data";
private final String expOCRResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 <lazy> #90 dog";
@BeforeClass
public static void setUpClass() throws Exception {
}
@AfterClass
public static void tearDownClass() throws Exception {
}
@Before
public void setUp() {
instance = new Tesseract1();
instance.setDatapath(new File(datapath).getPath());
}
@After
public void tearDown() {
}
/**
* Test of doOCR method, of class Tesseract1.
*
* @throws java.lang.Exception
*/
@Test
public void testDoOCR_File() throws Exception {
logger.info("doOCR on a PNG image");
File imageFile = new File(testResourcesDataPath, "eurotext.png");
String expResult = expOCRResult;
String result = instance.doOCR(imageFile);
logger.info(result);
assertEquals(expResult, result.substring(0, expResult.length()));
}
/**
* Test of doOCR method, of class Tesseract.
*
* @throws java.lang.Exception
*/
@Test
public void testDoOCR_UNLV_Zone_File() throws Exception {
logger.info("doOCR on a PNG image with UNLV zone file .uzn");
//UNLV zone format: left top width height label
File imageFile = new File(testResourcesDataPath, "eurotext_unlv.png");
String expResult = "& duck/goose, as 12.5% of E-mail\n\n"
+ "from aspammer@website.com is spam.\n\n"
+ "The (quick) [brown] {fox} jumps!\n"
+ "Over the $43,456.78 <lazy> #90 dog";
String result = instance.doOCR(imageFile);
logger.info(result);
assertEquals(expResult, result.trim());
}
/**
* Test of doOCR method, of class Tesseract.
*
* @throws java.lang.Exception
*/
@Test
public void testDoOCR_File_With_Configs() throws Exception {
logger.info("doOCR with configs");
File imageFile = new File(testResourcesDataPath, "eurotext.png");
String expResult = "[-0123456789.\n ]+";
List<String> configs = Arrays.asList("digits");
instance.setConfigs(configs);
String result = instance.doOCR(imageFile);
logger.info(result);
assertTrue(result.matches(expResult));
}
/**
* Test of doOCR method, of class Tesseract1.
*
* @throws java.lang.Exception
*/
@Test
public void testDoOCR_File_Rectangle() throws Exception {
logger.info("doOCR on a BMP image with bounding rectangle");
File imageFile = new File(testResourcesDataPath, "eurotext.bmp");
Rectangle rect = new Rectangle(0, 0, 1024, 800); // define an equal or smaller region of interest on the image
String expResult = expOCRResult;
String result = instance.doOCR(imageFile, rect);
logger.info(result);
assertEquals(expResult, result.substring(0, expResult.length()));
}
/**
* Test of doOCR method, of class Tesseract1.
*
* @throws java.lang.Exception
*/
@Test
public void testDoOCR_PDF() throws Exception {
logger.info("doOCR on a PDF document");
File imageFile = new File(testResourcesDataPath, "eurotext.pdf");
List<IIOImage> imageList = ImageIOHelper.getIIOImageList(imageFile);
String expResult = expOCRResult;
String result = instance.doOCR(imageList, null);
logger.info(result);
assertEquals(expResult, result.substring(0, expResult.length()));
}
/**
* Test of doOCR method, of class Tesseract1.
*
* @throws java.lang.Exception
*/
@Test
public void testDoOCR_BufferedImage() throws Exception {
logger.info("doOCR on a buffered image of a PNG");
File imageFile = new File(testResourcesDataPath, "eurotext.png");
BufferedImage bi = ImageIO.read(imageFile);
String expResult = expOCRResult;
String result = instance.doOCR(bi);
logger.info(result);
assertEquals(expResult, result.substring(0, expResult.length()));
}
/**
* Test of deskew algorithm.
*
* @throws java.lang.Exception
*/
@Test
public void testDoOCR_SkewedImage() throws Exception {
logger.info("doOCR on a skewed PNG image");
File imageFile = new File(testResourcesDataPath, "eurotext_deskew.png");
BufferedImage bi = ImageIO.read(imageFile);
ImageDeskew id = new ImageDeskew(bi);
double imageSkewAngle = id.getSkewAngle(); // determine skew angle
if ((imageSkewAngle > MINIMUM_DESKEW_THRESHOLD || imageSkewAngle < -(MINIMUM_DESKEW_THRESHOLD))) {
bi = ImageHelper.rotateImage(bi, -imageSkewAngle); // deskew image
}
String expResult = expOCRResult;
String result = instance.doOCR(bi);
logger.info(result);
assertEquals(expResult, result.substring(0, expResult.length()));
}
/**
* Test of createDocuments method, of class Tesseract.
*
* @throws java.lang.Exception
*/
@Test
public void testCreateDocuments() throws Exception {
logger.info("createDocuments for an image");
File imageFile1 = new File(testResourcesDataPath, "eurotext.pdf");
File imageFile2 = new File(testResourcesDataPath, "eurotext.png");
String outputbase1 = "test/test-results/docrenderer1-1";
String outputbase2 = "test/test-results/docrenderer1-2";
List<RenderedFormat> formats = new ArrayList<RenderedFormat>(Arrays.asList(RenderedFormat.HOCR, RenderedFormat.PDF, RenderedFormat.TEXT));
instance.createDocuments(new String[]{imageFile1.getPath(), imageFile2.getPath()}, new String[]{outputbase1, outputbase2}, formats);
assertTrue(new File(outputbase1 + ".pdf").exists());
}
/**
* Test of getWords method, of class Tesseract1.
*
* @throws java.lang.Exception
*/
@Test
public void testGetWords() throws Exception {
logger.info("getWords");
File imageFile = new File(testResourcesDataPath, "eurotext.tif");
String expResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 <lazy> #90 dog";
String[] expResults = expResult.split("\\s");
int pageIteratorLevel = TessPageIteratorLevel.RIL_WORD;
logger.info("PageIteratorLevel: " + Utils.getConstantName(pageIteratorLevel, TessPageIteratorLevel.class));
BufferedImage bi = ImageIO.read(imageFile);
List<Word> result = instance.getWords(bi, pageIteratorLevel);
// print the complete result
for (Word word : result) {
logger.info(word.toString());
}
List<String> text = new ArrayList<String>();
for (Word word : result.subList(0, expResults.length)) {
text.add(word.getText());
}
assertArrayEquals(expResults, text.toArray());
}
/**
* Test of getSegmentedRegions method, of class Tesseract1.
*
* @throws java.lang.Exception
*/
@Test
public void testGetSegmentedRegions() throws Exception {
logger.info("getSegmentedRegions at given TessPageIteratorLevel");
File imageFile = new File(testResourcesDataPath, "eurotext.png");
BufferedImage bi = ImageIO.read(imageFile);
int level = TessPageIteratorLevel.RIL_SYMBOL;
logger.info("PageIteratorLevel: " + Utils.getConstantName(level, TessPageIteratorLevel.class));
List<Rectangle> result = instance.getSegmentedRegions(bi, level);
for (int i = 0; i < result.size(); i++) {
Rectangle rect = result.get(i);
logger.info(String.format("Box[%d]: x=%d, y=%d, w=%d, h=%d", i, rect.x, rect.y, rect.width, rect.height));
}
assertTrue(result.size() > 0);
}
}

View File

@@ -0,0 +1,267 @@
/**
* Copyright @ 2010 Quan Nguyen
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package net.sourceforge.tess4j;
import java.awt.Rectangle;
import java.awt.image.BufferedImage;
import java.io.File;
import java.util.List;
import java.util.ArrayList;
import java.util.Arrays;
import javax.imageio.IIOImage;
import javax.imageio.ImageIO;
import net.sourceforge.tess4j.util.ImageHelper;
import net.sourceforge.tess4j.util.ImageIOHelper;
import net.sourceforge.tess4j.util.LoggHelper;
import net.sourceforge.tess4j.util.Utils;
import net.sourceforge.tess4j.ITesseract.RenderedFormat;
import net.sourceforge.tess4j.ITessAPI.TessPageIteratorLevel;
import static org.junit.Assert.*;
import com.recognition.software.jdeskew.ImageDeskew;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class TesseractTest {
private static final Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
static final double MINIMUM_DESKEW_THRESHOLD = 0.05d;
ITesseract instance;
private final String datapath = ".";
private final String testResourcesDataPath = "test/resources/test-data";
private final String expOCRResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 <lazy> #90 dog";
@BeforeClass
public static void setUpClass() throws Exception {
}
@AfterClass
public static void tearDownClass() throws Exception {
}
@Before
public void setUp() {
instance = new Tesseract();
instance.setDatapath(new File(datapath).getPath());
}
@After
public void tearDown() {
}
/**
* Test of doOCR method, of class Tesseract.
*
* @throws java.lang.Exception
*/
@Test
public void testDoOCR_File() throws Exception {
logger.info("doOCR on a PNG image");
File imageFile = new File(testResourcesDataPath, "eurotext.png");
String expResult = expOCRResult;
String result = instance.doOCR(imageFile);
logger.info(result);
assertEquals(expResult, result.substring(0, expResult.length()));
}
/**
* Test of doOCR method, of class Tesseract.
*
* @throws java.lang.Exception
*/
@Test
public void testDoOCR_UNLV_Zone_File() throws Exception {
logger.info("doOCR on a PNG image with UNLV zone file .uzn");
//UNLV zone format: left top width height label
File imageFile = new File(testResourcesDataPath, "eurotext_unlv.png");
String expResult = "& duck/goose, as 12.5% of E-mail\n\n"
+ "from aspammer@website.com is spam.\n\n"
+ "The (quick) [brown] {fox} jumps!\n"
+ "Over the $43,456.78 <lazy> #90 dog";
String result = instance.doOCR(imageFile);
logger.info(result);
assertEquals(expResult, result.trim());
}
/**
* Test of doOCR method, of class Tesseract.
*
* @throws java.lang.Exception
*/
@Test
public void testDoOCR_File_With_Configs() throws Exception {
logger.info("doOCR with configs");
File imageFile = new File(testResourcesDataPath, "eurotext.png");
String expResult = "[-0123456789.\n ]+";
List<String> configs = Arrays.asList("digits");
instance.setConfigs(configs);
String result = instance.doOCR(imageFile);
logger.info(result);
assertTrue(result.matches(expResult));
instance.setConfigs(null); // since Tesseract instance is a singleton, clear configs so the effects do not carry on into subsequent runs.
}
/**
* Test of doOCR method, of class Tesseract.
*
* @throws java.lang.Exception
*/
@Test
public void testDoOCR_File_Rectangle() throws Exception {
logger.info("doOCR on a BMP image with bounding rectangle");
File imageFile = new File(testResourcesDataPath, "eurotext.bmp");
Rectangle rect = new Rectangle(0, 0, 1024, 800); // define an equal or smaller region of interest on the image
String expResult = expOCRResult;
String result = instance.doOCR(imageFile, rect);
logger.info(result);
assertEquals(expResult, result.substring(0, expResult.length()));
}
/**
* Test of doOCR method, of class Tesseract.
*
* @throws java.lang.Exception
*/
@Test
public void testDoOCR_PDF() throws Exception {
logger.info("doOCR on a PDF document");
File imageFile = new File(testResourcesDataPath, "eurotext.pdf");
List<IIOImage> imageList = ImageIOHelper.getIIOImageList(imageFile);
String expResult = expOCRResult;
String result = instance.doOCR(imageList, null);
logger.info(result);
assertEquals(expResult, result.substring(0, expResult.length()));
}
/**
* Test of doOCR method, of class Tesseract.
*
* @throws java.lang.Exception
*/
@Test
public void testDoOCR_BufferedImage() throws Exception {
logger.info("doOCR on a buffered image of a PNG");
File imageFile = new File(testResourcesDataPath, "eurotext.png");
BufferedImage bi = ImageIO.read(imageFile);
String expResult = expOCRResult;
String result = instance.doOCR(bi);
logger.info(result);
assertEquals(expResult, result.substring(0, expResult.length()));
}
/**
* Test of deskew algorithm.
*
* @throws java.lang.Exception
*/
@Test
public void testDoOCR_SkewedImage() throws Exception {
logger.info("doOCR on a skewed PNG image");
File imageFile = new File(testResourcesDataPath, "eurotext_deskew.png");
BufferedImage bi = ImageIO.read(imageFile);
ImageDeskew id = new ImageDeskew(bi);
double imageSkewAngle = id.getSkewAngle(); // determine skew angle
if ((imageSkewAngle > MINIMUM_DESKEW_THRESHOLD || imageSkewAngle < -(MINIMUM_DESKEW_THRESHOLD))) {
bi = ImageHelper.rotateImage(bi, -imageSkewAngle); // deskew image
}
String expResult = expOCRResult;
String result = instance.doOCR(bi);
logger.info(result);
assertEquals(expResult, result.substring(0, expResult.length()));
}
/**
* Test of createDocuments method, of class Tesseract.
*
* @throws java.lang.Exception
*/
@Test
public void testCreateDocuments() throws Exception {
logger.info("createDocuments for multiple images");
File imageFile1 = new File(testResourcesDataPath, "eurotext.pdf");
File imageFile2 = new File(testResourcesDataPath, "eurotext.png");
String outputbase1 = "test/test-results/docrenderer-1";
String outputbase2 = "test/test-results/docrenderer-2";
List<RenderedFormat> formats = new ArrayList<RenderedFormat>(Arrays.asList(RenderedFormat.HOCR, RenderedFormat.PDF, RenderedFormat.TEXT));
instance.createDocuments(new String[]{imageFile1.getPath(), imageFile2.getPath()}, new String[]{outputbase1, outputbase2}, formats);
assertTrue(new File(outputbase1 + ".pdf").exists());
}
/**
* Test of getWords method, of class Tesseract.
*
* @throws java.lang.Exception
*/
@Test
public void testGetWords() throws Exception {
logger.info("getWords");
File imageFile = new File(testResourcesDataPath, "eurotext.tif");
String expResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 <lazy> #90 dog";
String[] expResults = expResult.split("\\s");
int pageIteratorLevel = TessPageIteratorLevel.RIL_WORD;
logger.info("PageIteratorLevel: " + Utils.getConstantName(pageIteratorLevel, TessPageIteratorLevel.class));
BufferedImage bi = ImageIO.read(imageFile);
List<Word> result = instance.getWords(bi, pageIteratorLevel);
//print the complete result
for (Word word : result) {
logger.info(word.toString());
}
List<String> text = new ArrayList<String>();
for (Word word : result.subList(0, expResults.length)) {
text.add(word.getText());
}
assertArrayEquals(expResults, text.toArray());
}
/**
* Test of getSegmentedRegions method, of class Tesseract.
*
* @throws java.lang.Exception
*/
@Test
public void testGetSegmentedRegions() throws Exception {
logger.info("getSegmentedRegions at given TessPageIteratorLevel");
File imageFile = new File(testResourcesDataPath, "eurotext.png");
BufferedImage bi = ImageIO.read(imageFile);
int level = TessPageIteratorLevel.RIL_SYMBOL;
logger.info("PageIteratorLevel: " + Utils.getConstantName(level, TessPageIteratorLevel.class));
List<Rectangle> result = instance.getSegmentedRegions(bi, level);
for (int i = 0; i < result.size(); i++) {
Rectangle rect = result.get(i);
logger.info(String.format("Box[%d]: x=%d, y=%d, w=%d, h=%d", i, rect.x, rect.y, rect.width, rect.height));
}
assertTrue(result.size() > 0);
}
}

View File

@@ -0,0 +1,84 @@
/**
* Copyright @ 2008 Quan Nguyen
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package net.sourceforge.tess4j;
import static org.junit.Assert.assertTrue;
import java.io.File;
import java.net.URISyntaxException;
import java.net.URL;
import net.sourceforge.tess4j.util.LoadLibs;
import net.sourceforge.tess4j.util.LoggHelper;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class TestFolderExtraction {
private static final Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
@Test
public void testFolderExtraction() {
File tessDataFolder = null;
try {
/**
* Loads the image from resources.
*/
String filename = String.format("%s/%s", "/test-data", "eurotext.pdf");
URL defaultImage = getClass().getResource(filename);
File imageFile = new File(defaultImage.toURI());
/**
* Extracts <code>tessdata</code> folder into a temp folder.
*/
logger.info("Loading the tessdata folder into a temporary folder.");
tessDataFolder = LoadLibs.extractTessResources("tessdata");
/**
* Gets tesseract instance and sets data path.
*/
ITesseract instance = new Tesseract();
if (tessDataFolder != null) {
logger.info(tessDataFolder.getAbsolutePath());
instance.setDatapath(tessDataFolder.getParent());
}
/**
* Performs OCR on the image.
*/
String result = instance.doOCR(imageFile);
logger.info(result);
} catch (TesseractException e) {
logger.error(e.getMessage());
logger.error(e.getMessage(), e);
} catch (URISyntaxException e) {
logger.error(e.getMessage(), e);
}
// checks if tessdata folder exists
assertTrue(tessDataFolder != null && tessDataFolder.exists());
}
}

View File

@@ -0,0 +1,128 @@
/*
* Copyright 2014 Quan Nguyen.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package net.sourceforge.tess4j.util;
import java.io.File;
import java.io.IOException;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import static org.junit.Assert.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class PdfUtilitiesTest {
private static final Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
private final String TEST_RESOURCES_DATA_PATH = "test/resources/test-data";
@BeforeClass
public static void setUpClass() {
}
@AfterClass
public static void tearDownClass() {
}
@Before
public void setUp() {
System.setProperty(PdfUtilities.PDF_LIBRARY, PdfUtilities.PDFBOX); // Note: comment out to test Ghostscript
}
@After
public void tearDown() {
}
/**
* Test of convertPdf2Tiff method, of class PdfUtilities.
*
* @throws java.lang.Exception
*/
@Test
public void testConvertPdf2Tiff() throws Exception {
logger.info("convertPdf2Tiff");
File inputPdfFile = new File(TEST_RESOURCES_DATA_PATH, "eurotext.pdf");
File result = PdfUtilities.convertPdf2Tiff(inputPdfFile);
result.deleteOnExit();
assertTrue(result.exists());
}
/**
* Test of convertPdf2Png method, of class PdfUtilities.
*
* @throws java.io.IOException
*/
@Test
public void testConvertPdf2Png() throws IOException {
logger.info("convertPdf2Png");
File inputPdfFile = new File(TEST_RESOURCES_DATA_PATH, "eurotext.pdf");
File[] results = PdfUtilities.convertPdf2Png(inputPdfFile);
assertTrue(results.length > 0);
//clean up
File parentDir = results[0].getParentFile();
for (File result : results) {
result.delete();
}
parentDir.delete();
}
/**
* Test of splitPdf method, of class PdfUtilities.
*/
@Test
public void testSplitPdf() {
logger.info("splitPdf");
File inputPdfFile = new File(TEST_RESOURCES_DATA_PATH, "multipage-pdf.pdf");
File outputPdfFile = new File("test/test-results/multipage-pdf_splitted.pdf");
int startPage = 2;
int endPage = 3;
int expResult = 2;
PdfUtilities.splitPdf(inputPdfFile, outputPdfFile, startPage, endPage);
int pageCount = PdfUtilities.getPdfPageCount(outputPdfFile);
assertEquals(expResult, pageCount);
}
/**
* Test of getPdfPageCount method, of class PdfUtilities.
*/
@Test
public void testGetPdfPageCount() {
logger.info("getPdfPageCount");
File inputPdfFile = new File(TEST_RESOURCES_DATA_PATH, "multipage-pdf.pdf");
int expResult = 5;
int result = PdfUtilities.getPdfPageCount(inputPdfFile);
assertEquals(expResult, result);
}
/**
* Test of mergePdf method, of class PdfUtilities.
*/
@Test
public void testMergePdf() {
logger.info("mergePdf");
File pdfPartOne = new File(TEST_RESOURCES_DATA_PATH, "eurotext.pdf");
File pdfPartTwo = new File(TEST_RESOURCES_DATA_PATH, "multipage-pdf.pdf");
int expResult = 6;
File outputPdfFile = new File("test/test-results", "multipage-pdf_merged.pdf");
File[] inputPdfFiles = {pdfPartOne, pdfPartTwo};
PdfUtilities.mergePdf(inputPdfFiles, outputPdfFile);
assertEquals(expResult, PdfUtilities.getPdfPageCount(outputPdfFile));
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 100 KiB

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 200 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

View File

@@ -0,0 +1,3 @@
97 162 747 50 ThirdLine
97 209 828 55 FourthLine
92 56 810 107 First2Lines

Binary file not shown.