fix bugs
This commit is contained in:
9
NGCC/Tess4J/test/log4j.properties
Normal file
9
NGCC/Tess4J/test/log4j.properties
Normal file
@@ -0,0 +1,9 @@
|
||||
# Set root logger level to DEBUG and its only appender to A1.
|
||||
log4j.rootLogger=DEBUG, A1
|
||||
|
||||
# A1 is set to be a ConsoleAppender.
|
||||
log4j.appender.A1=org.apache.log4j.ConsoleAppender
|
||||
|
||||
# A1 uses PatternLayout.
|
||||
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
|
||||
log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
|
75
NGCC/Tess4J/test/net/sourceforge/tess4j/ProgressMonitor.java
Normal file
75
NGCC/Tess4J/test/net/sourceforge/tess4j/ProgressMonitor.java
Normal file
@@ -0,0 +1,75 @@
|
||||
/**
|
||||
* Copyright @ 2014 Quan Nguyen
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
* use this file except in compliance with the License. You may obtain a copy of
|
||||
* the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
package net.sourceforge.tess4j;
|
||||
|
||||
import com.sun.jna.Pointer;
|
||||
import net.sourceforge.tess4j.util.LoggHelper;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static net.sourceforge.tess4j.ITessAPI.TRUE;
|
||||
|
||||
class ProgressMonitor extends Thread {
|
||||
|
||||
ITessAPI.ETEXT_DESC monitor;
|
||||
StringBuilder outputMessage = new StringBuilder();
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
|
||||
|
||||
public ProgressMonitor(ITessAPI.ETEXT_DESC monitor) {
|
||||
this.monitor = monitor;
|
||||
}
|
||||
|
||||
public String getMessage() {
|
||||
return outputMessage.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
while (true) {
|
||||
logger.info("ocr alive: " + (monitor.ocr_alive == TRUE));
|
||||
logger.info("progress: " + monitor.progress);
|
||||
outputMessage.append(monitor.more_to_come);
|
||||
if (monitor.progress >= 100) {
|
||||
break;
|
||||
}
|
||||
Thread.sleep(100);
|
||||
}
|
||||
} catch (Exception ioe) {
|
||||
ioe.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Cancels OCR operation.
|
||||
*/
|
||||
public void cancel() {
|
||||
monitor.cancel = new ITessAPI.CANCEL_FUNC() {
|
||||
@Override
|
||||
public boolean invoke(Pointer cancel_this, int words) {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets cancel flag.
|
||||
*/
|
||||
public void reset() {
|
||||
monitor.cancel = null;
|
||||
}
|
||||
}
|
645
NGCC/Tess4J/test/net/sourceforge/tess4j/TessAPI1Test.java
Normal file
645
NGCC/Tess4J/test/net/sourceforge/tess4j/TessAPI1Test.java
Normal file
@@ -0,0 +1,645 @@
|
||||
/**
|
||||
* Copyright @ 2012 Quan Nguyen
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
* use this file except in compliance with the License. You may obtain a copy of
|
||||
* the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
package net.sourceforge.tess4j;
|
||||
|
||||
import static org.junit.Assert.assertArrayEquals;
|
||||
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileReader;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.FloatBuffer;
|
||||
import java.nio.IntBuffer;
|
||||
import java.util.Arrays;
|
||||
|
||||
import javax.imageio.ImageIO;
|
||||
|
||||
import net.sourceforge.tess4j.util.LoggHelper;
|
||||
import net.sourceforge.tess4j.util.Utils;
|
||||
import net.sourceforge.tess4j.util.ImageIOHelper;
|
||||
|
||||
import com.ochafik.lang.jnaerator.runtime.NativeSize;
|
||||
import com.sun.jna.NativeLong;
|
||||
|
||||
import org.junit.After;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import com.sun.jna.Pointer;
|
||||
import com.sun.jna.StringArray;
|
||||
import com.sun.jna.ptr.PointerByReference;
|
||||
import net.sourceforge.lept4j.Box;
|
||||
import net.sourceforge.lept4j.Boxa;
|
||||
import static net.sourceforge.lept4j.ILeptonica.L_CLONE;
|
||||
import net.sourceforge.lept4j.Leptonica;
|
||||
import net.sourceforge.lept4j.Leptonica1;
|
||||
import net.sourceforge.lept4j.Pix;
|
||||
import net.sourceforge.lept4j.util.LeptUtils;
|
||||
|
||||
import net.sourceforge.tess4j.ITessAPI.*;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static net.sourceforge.tess4j.ITessAPI.FALSE;
|
||||
import static net.sourceforge.tess4j.ITessAPI.TRUE;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
public class TessAPI1Test {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
|
||||
private final String datapath = ".";
|
||||
private final String testResourcesDataPath = "test/resources/test-data";
|
||||
String language = "eng";
|
||||
String expOCRResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 <lazy> #90 dog";
|
||||
|
||||
TessBaseAPI handle;
|
||||
|
||||
@BeforeClass
|
||||
public static void setUpClass() throws Exception {
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void tearDownClass() throws Exception {
|
||||
}
|
||||
|
||||
@Before
|
||||
public void setUp() {
|
||||
handle = TessAPI1.TessBaseAPICreate();
|
||||
}
|
||||
|
||||
@After
|
||||
public void tearDown() {
|
||||
TessAPI1.TessBaseAPIDelete(handle);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of TessBaseAPIRect method, of class TessAPI1.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testTessBaseAPIRect() throws Exception {
|
||||
logger.info("TessBaseAPIRect");
|
||||
String expResult = expOCRResult;
|
||||
File tiff = new File(testResourcesDataPath, "eurotext.tif");
|
||||
BufferedImage image = ImageIO.read(tiff); // require jai-imageio lib to read TIFF
|
||||
ByteBuffer buf = ImageIOHelper.convertImageData(image);
|
||||
int bpp = image.getColorModel().getPixelSize();
|
||||
int bytespp = bpp / 8;
|
||||
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
|
||||
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
|
||||
TessAPI1.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO);
|
||||
Pointer utf8Text = TessAPI1.TessBaseAPIRect(handle, buf, bytespp, bytespl, 0, 0, image.getWidth(), image.getHeight());
|
||||
String result = utf8Text.getString(0);
|
||||
TessAPI1.TessDeleteText(utf8Text);
|
||||
logger.info(result);
|
||||
assertEquals(expResult, result.substring(0, expResult.length()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of TessBaseAPIGetUTF8Text method, of class TessAPI1.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testTessBaseAPIGetUTF8Text() throws Exception {
|
||||
logger.info("TessBaseAPIGetUTF8Text");
|
||||
String expResult = expOCRResult;
|
||||
File tiff = new File(testResourcesDataPath, "eurotext.tif");
|
||||
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
|
||||
ByteBuffer buf = ImageIOHelper.convertImageData(image);
|
||||
int bpp = image.getColorModel().getPixelSize();
|
||||
int bytespp = bpp / 8;
|
||||
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
|
||||
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
|
||||
TessAPI1.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO);
|
||||
TessAPI1.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
|
||||
TessAPI1.TessBaseAPISetRectangle(handle, 0, 0, 1024, 800);
|
||||
Pointer utf8Text = TessAPI1.TessBaseAPIGetUTF8Text(handle);
|
||||
String result = utf8Text.getString(0);
|
||||
TessAPI1.TessDeleteText(utf8Text);
|
||||
logger.info(result);
|
||||
assertEquals(expResult, result.substring(0, expResult.length()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of TessBaseAPIGetUTF8Text method, of class TessAPI1.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testTessBaseAPIGetUTF8Text_Pix() throws Exception {
|
||||
logger.info("TessBaseAPIGetUTF8Text_Pix");
|
||||
String expResult = expOCRResult;
|
||||
File tiff = new File(testResourcesDataPath, "eurotext.tif");
|
||||
Leptonica leptInstance = Leptonica.INSTANCE;
|
||||
Pix pix = leptInstance.pixRead(tiff.getPath());
|
||||
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
|
||||
TessAPI1.TessBaseAPISetImage2(handle, pix);
|
||||
Pointer utf8Text = TessAPI1.TessBaseAPIGetUTF8Text(handle);
|
||||
String result = utf8Text.getString(0);
|
||||
TessAPI1.TessDeleteText(utf8Text);
|
||||
logger.info(result);
|
||||
|
||||
//release Pix resource
|
||||
PointerByReference pRef = new PointerByReference();
|
||||
pRef.setValue(pix.getPointer());
|
||||
leptInstance.pixDestroy(pRef);
|
||||
|
||||
assertEquals(expResult, result.substring(0, expResult.length()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of TessBaseAPIGetComponentImages method, of class TessAPI1.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testTessBaseAPIGetComponentImages() throws Exception {
|
||||
logger.info("TessBaseAPIGetComponentImages");
|
||||
File image = new File(testResourcesDataPath, "eurotext.png");
|
||||
int expResult = 12; // number of lines in the test image
|
||||
Pix pix = Leptonica1.pixRead(image.getPath());
|
||||
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
|
||||
TessAPI1.TessBaseAPISetImage2(handle, pix);
|
||||
PointerByReference pixa = null;
|
||||
PointerByReference blockids = null;
|
||||
Boxa boxes = TessAPI1.TessBaseAPIGetComponentImages(handle, TessPageIteratorLevel.RIL_TEXTLINE, TRUE, pixa, blockids);
|
||||
// boxes = TessAPI1.TessBaseAPIGetRegions(handle, pixa); // equivalent to TessPageIteratorLevel.RIL_BLOCK
|
||||
int boxCount = Leptonica1.boxaGetCount(boxes);
|
||||
for (int i = 0; i < boxCount; i++) {
|
||||
Box box = Leptonica1.boxaGetBox(boxes, i, L_CLONE);
|
||||
if (box == null) {
|
||||
continue;
|
||||
}
|
||||
TessAPI1.TessBaseAPISetRectangle(handle, box.x, box.y, box.w, box.h);
|
||||
Pointer utf8Text = TessAPI1.TessBaseAPIGetUTF8Text(handle);
|
||||
String ocrResult = utf8Text.getString(0);
|
||||
TessAPI1.TessDeleteText(utf8Text);
|
||||
int conf = TessAPI1.TessBaseAPIMeanTextConf(handle);
|
||||
System.out.print(String.format("Box[%d]: x=%d, y=%d, w=%d, h=%d, confidence: %d, text: %s", i, box.x, box.y, box.w, box.h, conf, ocrResult));
|
||||
LeptUtils.dispose(box);
|
||||
}
|
||||
|
||||
// release Pix and Boxa resources
|
||||
LeptUtils.dispose(pix);
|
||||
LeptUtils.dispose(boxes);
|
||||
|
||||
assertEquals(expResult, boxCount);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of TessVersion method, of class TessAPI1.
|
||||
*/
|
||||
@Test
|
||||
public void testTessVersion() {
|
||||
logger.info("TessVersion");
|
||||
String expResult = "3.05.01";
|
||||
String result = TessAPI1.TessVersion();
|
||||
logger.info(result);
|
||||
assertTrue(result.startsWith(expResult));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of TessBaseAPIGetBoolVariable method, of class TessAPI1.
|
||||
*/
|
||||
@Test
|
||||
public void testTessBaseAPIGetBoolVariable() {
|
||||
logger.info("TessBaseAPIGetBoolVariable");
|
||||
String name = "tessedit_create_hocr";
|
||||
TessAPI1.TessBaseAPISetVariable(handle, name, "1");
|
||||
IntBuffer value = IntBuffer.allocate(1);
|
||||
int result = -1;
|
||||
if (TessAPI1.TessBaseAPIGetBoolVariable(handle, "tessedit_create_hocr", value) == TRUE) {
|
||||
result = value.get(0);
|
||||
}
|
||||
int expResult = 1;
|
||||
assertEquals(expResult, result);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of TessBaseAPIPrintVariables method, of class TessAPI1.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testTessBaseAPIPrintVariablesToFile() throws Exception {
|
||||
logger.info("TessBaseAPIPrintVariablesToFile");
|
||||
String var = "tessedit_char_whitelist";
|
||||
String value = "0123456789";
|
||||
TessAPI1.TessBaseAPISetVariable(handle, var, value);
|
||||
String filename = "printvar.txt";
|
||||
TessAPI1.TessBaseAPIPrintVariablesToFile(handle, filename); // will crash if not invoked after some method
|
||||
File file = new File(filename);
|
||||
BufferedReader input = new BufferedReader(new FileReader(file));
|
||||
StringBuilder strB = new StringBuilder();
|
||||
String line;
|
||||
String EOL = System.getProperty("line.separator");
|
||||
while ((line = input.readLine()) != null) {
|
||||
strB.append(line).append(EOL);
|
||||
}
|
||||
input.close();
|
||||
file.delete();
|
||||
assertTrue(strB.toString().contains(var + "\t" + value));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of TessBaseAPIInit4 method, of class TessAPI1.
|
||||
*/
|
||||
@Test
|
||||
public void testTessBaseAPIInit4() {
|
||||
logger.info("TessBaseAPIInit4");
|
||||
int oem = TessOcrEngineMode.OEM_DEFAULT;
|
||||
PointerByReference configs = null;
|
||||
int configs_size = 0;
|
||||
|
||||
// disable loading dictionaries
|
||||
String[] args = new String[]{"load_system_dawg", "load_freq_dawg"};
|
||||
StringArray sarray = new StringArray(args);
|
||||
PointerByReference vars_vec = new PointerByReference();
|
||||
vars_vec.setPointer(sarray);
|
||||
|
||||
args = new String[]{"F", "F"};
|
||||
sarray = new StringArray(args);
|
||||
PointerByReference vars_values = new PointerByReference();
|
||||
vars_values.setPointer(sarray);
|
||||
|
||||
NativeSize vars_vec_size = new NativeSize(args.length);
|
||||
|
||||
int expResult = 0;
|
||||
int result = TessAPI1.TessBaseAPIInit4(handle, datapath, language, oem, configs, configs_size, vars_vec, vars_values, vars_vec_size, FALSE);
|
||||
assertEquals(expResult, result);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of TessBaseAPIGetInitLanguagesAsString method, of class TessAPI1.
|
||||
*/
|
||||
@Test
|
||||
public void testTessBaseAPIGetInitLanguagesAsString() {
|
||||
logger.info("TessBaseAPIGetInitLanguagesAsString");
|
||||
String expResult = "";
|
||||
String result = TessAPI1.TessBaseAPIGetInitLanguagesAsString(handle);
|
||||
assertEquals(expResult, result);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of TessBaseAPIGetLoadedLanguagesAsVector method, of class TessAPI1.
|
||||
*/
|
||||
@Test
|
||||
public void testTessBaseAPIGetLoadedLanguagesAsVector() {
|
||||
logger.info("TessBaseAPIGetLoadedLanguagesAsVector");
|
||||
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
|
||||
String[] expResult = {"eng"};
|
||||
String[] result = TessAPI1.TessBaseAPIGetLoadedLanguagesAsVector(handle).getPointer().getStringArray(0);
|
||||
assertArrayEquals(expResult, result);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of TessBaseAPIGetAvailableLanguagesAsVector method, of class
|
||||
* TessAPI1.
|
||||
*/
|
||||
@Test
|
||||
public void testTessBaseAPIGetAvailableLanguagesAsVector() {
|
||||
logger.info("TessBaseAPIGetAvailableLanguagesAsVector");
|
||||
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
|
||||
String[] expResult = {"eng"};
|
||||
String[] result = TessAPI1.TessBaseAPIGetAvailableLanguagesAsVector(handle).getPointer().getStringArray(0);
|
||||
assertTrue(Arrays.asList(result).containsAll(Arrays.asList(expResult)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of TessBaseAPIGetHOCRText method, of class TessAPI1.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testTessBaseAPIGetHOCRText() throws Exception {
|
||||
logger.info("TessBaseAPIGetHOCRText");
|
||||
File tiff = new File(testResourcesDataPath, "eurotext.tif");
|
||||
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
|
||||
ByteBuffer buf = ImageIOHelper.convertImageData(image);
|
||||
int bpp = image.getColorModel().getPixelSize();
|
||||
int bytespp = bpp / 8;
|
||||
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
|
||||
TessAPI1.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO);
|
||||
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
|
||||
TessAPI1.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
|
||||
int page_number = 0;
|
||||
Pointer utf8Text = TessAPI1.TessBaseAPIGetHOCRText(handle, page_number);
|
||||
String result = utf8Text.getString(0);
|
||||
TessAPI1.TessDeleteText(utf8Text);
|
||||
assertTrue(result.contains("<div class='ocr_page'"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of TessBaseAPIAnalyseLayout method, of class TessAPI1.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testTessBaseAPIAnalyseLayout() throws Exception {
|
||||
logger.info("TessBaseAPIAnalyseLayout");
|
||||
File image = new File(testResourcesDataPath, "eurotext.png");
|
||||
int expResult = 12; // number of lines in the test image
|
||||
Leptonica leptInstance = Leptonica.INSTANCE;
|
||||
Pix pix = leptInstance.pixRead(image.getPath());
|
||||
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
|
||||
TessAPI1.TessBaseAPISetImage2(handle, pix);
|
||||
int pageIteratorLevel = TessPageIteratorLevel.RIL_TEXTLINE;
|
||||
logger.info("PageIteratorLevel: " + Utils.getConstantName(pageIteratorLevel, TessPageIteratorLevel.class));
|
||||
int i = 0;
|
||||
TessPageIterator pi = TessAPI1.TessBaseAPIAnalyseLayout(handle);
|
||||
|
||||
do {
|
||||
IntBuffer leftB = IntBuffer.allocate(1);
|
||||
IntBuffer topB = IntBuffer.allocate(1);
|
||||
IntBuffer rightB = IntBuffer.allocate(1);
|
||||
IntBuffer bottomB = IntBuffer.allocate(1);
|
||||
TessAPI1.TessPageIteratorBoundingBox(pi, pageIteratorLevel, leftB, topB, rightB, bottomB);
|
||||
int left = leftB.get();
|
||||
int top = topB.get();
|
||||
int right = rightB.get();
|
||||
int bottom = bottomB.get();
|
||||
logger.info(String.format("Box[%d]: x=%d, y=%d, w=%d, h=%d", i++, left, top, right - left, bottom - top));
|
||||
} while (TessAPI1.TessPageIteratorNext(pi, pageIteratorLevel) == TRUE);
|
||||
TessAPI1.TessPageIteratorDelete(pi);
|
||||
assertEquals(expResult, i);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of TessBaseAPIDetectOrientationScript method, of class TessAPI1.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testTessBaseAPIDetectOrientationScript() throws Exception {
|
||||
logger.info("TessBaseAPIDetectOrientationScript");
|
||||
File image = new File(testResourcesDataPath, "eurotext.png");
|
||||
int expResult = TRUE;
|
||||
Pix pix = Leptonica1.pixRead(image.getPath());
|
||||
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
|
||||
TessAPI1.TessBaseAPISetImage2(handle, pix);
|
||||
|
||||
IntBuffer orient_degB = IntBuffer.allocate(1);
|
||||
FloatBuffer orient_confB = FloatBuffer.allocate(1);
|
||||
PointerByReference script_nameB = new PointerByReference();
|
||||
FloatBuffer script_confB = FloatBuffer.allocate(1);
|
||||
|
||||
int result = TessAPI1.TessBaseAPIDetectOrientationScript(handle, orient_degB, orient_confB, script_nameB, script_confB);
|
||||
if (result == TRUE) {
|
||||
int orient_deg = orient_degB.get();
|
||||
float orient_conf = orient_confB.get();
|
||||
String script_name = script_nameB.getValue().getString(0);
|
||||
float script_conf = script_confB.get();
|
||||
logger.info(String.format("OrientationScript: orient_deg=%d, orient_conf=%f, script_name=%s, script_conf=%f", orient_deg, orient_conf, script_name, script_conf));
|
||||
}
|
||||
|
||||
PointerByReference pRef = new PointerByReference();
|
||||
pRef.setValue(pix.getPointer());
|
||||
Leptonica1.pixDestroy(pRef);
|
||||
assertEquals(expResult, result);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of Orientation and script detection (OSD).
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testOSD() throws Exception {
|
||||
logger.info("OSD");
|
||||
int expResult = TessPageSegMode.PSM_AUTO_OSD;
|
||||
IntBuffer orientation = IntBuffer.allocate(1);
|
||||
IntBuffer direction = IntBuffer.allocate(1);
|
||||
IntBuffer order = IntBuffer.allocate(1);
|
||||
FloatBuffer deskew_angle = FloatBuffer.allocate(1);
|
||||
File tiff = new File(testResourcesDataPath, "eurotext.tif");
|
||||
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
|
||||
ByteBuffer buf = ImageIOHelper.convertImageData(image);
|
||||
int bpp = image.getColorModel().getPixelSize();
|
||||
int bytespp = bpp / 8;
|
||||
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
|
||||
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
|
||||
TessAPI1.TessBaseAPISetPageSegMode(handle, expResult);
|
||||
int actualResult = TessAPI1.TessBaseAPIGetPageSegMode(handle);
|
||||
logger.info("PSM: " + Utils.getConstantName(actualResult, TessPageSegMode.class));
|
||||
TessAPI1.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
|
||||
int success = TessAPI1.TessBaseAPIRecognize(handle, null);
|
||||
if (success == 0) {
|
||||
TessAPI1.TessPageIterator pi = TessAPI1.TessBaseAPIAnalyseLayout(handle);
|
||||
TessAPI1.TessPageIteratorOrientation(pi, orientation, direction, order, deskew_angle);
|
||||
logger.info(String.format(
|
||||
"Orientation: %s\nWritingDirection: %s\nTextlineOrder: %s\nDeskew angle: %.4f\n",
|
||||
Utils.getConstantName(orientation.get(), TessOrientation.class),
|
||||
Utils.getConstantName(direction.get(), TessWritingDirection.class),
|
||||
Utils.getConstantName(order.get(), TessTextlineOrder.class),
|
||||
deskew_angle.get()));
|
||||
}
|
||||
assertEquals(expResult, actualResult);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of ResultIterator and PageIterator.
|
||||
*
|
||||
* @throws Exception
|
||||
*/
|
||||
@Test
|
||||
public void testResultIterator() throws Exception {
|
||||
logger.info("TessBaseAPIGetIterator");
|
||||
File tiff = new File(testResourcesDataPath, "eurotext.tif");
|
||||
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
|
||||
ByteBuffer buf = ImageIOHelper.convertImageData(image);
|
||||
int bpp = image.getColorModel().getPixelSize();
|
||||
int bytespp = bpp / 8;
|
||||
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
|
||||
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
|
||||
TessAPI1.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO);
|
||||
TessAPI1.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
|
||||
ETEXT_DESC monitor = new ETEXT_DESC();
|
||||
ITessAPI.TimeVal timeout = new ITessAPI.TimeVal();
|
||||
timeout.tv_sec = new NativeLong(0L); // time > 0 causes blank ouput
|
||||
monitor.end_time = timeout;
|
||||
ProgressMonitor pmo = new ProgressMonitor(monitor);
|
||||
pmo.start();
|
||||
TessAPI1.TessBaseAPIRecognize(handle, monitor);
|
||||
logger.info("Message: " + pmo.getMessage());
|
||||
TessResultIterator ri = TessAPI1.TessBaseAPIGetIterator(handle);
|
||||
TessPageIterator pi = TessAPI1.TessResultIteratorGetPageIterator(ri);
|
||||
TessAPI1.TessPageIteratorBegin(pi);
|
||||
logger.info("Bounding boxes:\nchar(s) left top right bottom confidence font-attributes");
|
||||
int level = TessPageIteratorLevel.RIL_WORD;
|
||||
|
||||
// int height = image.getHeight();
|
||||
do {
|
||||
Pointer ptr = TessAPI1.TessResultIteratorGetUTF8Text(ri, level);
|
||||
String word = ptr.getString(0);
|
||||
TessAPI1.TessDeleteText(ptr);
|
||||
float confidence = TessAPI1.TessResultIteratorConfidence(ri, level);
|
||||
IntBuffer leftB = IntBuffer.allocate(1);
|
||||
IntBuffer topB = IntBuffer.allocate(1);
|
||||
IntBuffer rightB = IntBuffer.allocate(1);
|
||||
IntBuffer bottomB = IntBuffer.allocate(1);
|
||||
TessAPI1.TessPageIteratorBoundingBox(pi, level, leftB, topB, rightB, bottomB);
|
||||
int left = leftB.get();
|
||||
int top = topB.get();
|
||||
int right = rightB.get();
|
||||
int bottom = bottomB.get();
|
||||
System.out.print(String.format("%s %d %d %d %d %f", word, left, top, right, bottom, confidence));
|
||||
// logger.info(String.format("%s %d %d %d %d", str, left, height - bottom, right, height - top)); //
|
||||
// training box coordinates
|
||||
|
||||
IntBuffer boldB = IntBuffer.allocate(1);
|
||||
IntBuffer italicB = IntBuffer.allocate(1);
|
||||
IntBuffer underlinedB = IntBuffer.allocate(1);
|
||||
IntBuffer monospaceB = IntBuffer.allocate(1);
|
||||
IntBuffer serifB = IntBuffer.allocate(1);
|
||||
IntBuffer smallcapsB = IntBuffer.allocate(1);
|
||||
IntBuffer pointSizeB = IntBuffer.allocate(1);
|
||||
IntBuffer fontIdB = IntBuffer.allocate(1);
|
||||
String fontName = TessAPI1.TessResultIteratorWordFontAttributes(ri, boldB, italicB, underlinedB,
|
||||
monospaceB, serifB, smallcapsB, pointSizeB, fontIdB);
|
||||
boolean bold = boldB.get() == TRUE;
|
||||
boolean italic = italicB.get() == TRUE;
|
||||
boolean underlined = underlinedB.get() == TRUE;
|
||||
boolean monospace = monospaceB.get() == TRUE;
|
||||
boolean serif = serifB.get() == TRUE;
|
||||
boolean smallcaps = smallcapsB.get() == TRUE;
|
||||
int pointSize = pointSizeB.get();
|
||||
int fontId = fontIdB.get();
|
||||
logger.info(String.format(" font: %s, size: %d, font id: %d, bold: %b,"
|
||||
+ " italic: %b, underlined: %b, monospace: %b, serif: %b, smallcap: %b", fontName, pointSize,
|
||||
fontId, bold, italic, underlined, monospace, serif, smallcaps));
|
||||
} while (TessAPI1.TessPageIteratorNext(pi, level) == TRUE);
|
||||
|
||||
assertTrue(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of ChoiceIterator.
|
||||
*
|
||||
* @throws Exception
|
||||
*/
|
||||
@Test
|
||||
public void testChoiceIterator() throws Exception {
|
||||
logger.info("TessResultIteratorGetChoiceIterator");
|
||||
File tiff = new File(testResourcesDataPath, "eurotext.tif");
|
||||
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
|
||||
ByteBuffer buf = ImageIOHelper.convertImageData(image);
|
||||
int bpp = image.getColorModel().getPixelSize();
|
||||
int bytespp = bpp / 8;
|
||||
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
|
||||
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
|
||||
TessAPI1.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
|
||||
TessAPI1.TessBaseAPISetVariable(handle, "save_blob_choices", "T");
|
||||
TessAPI1.TessBaseAPISetRectangle(handle, 37, 228, 548, 31);
|
||||
ETEXT_DESC monitor = new ETEXT_DESC();
|
||||
ProgressMonitor pmo = new ProgressMonitor(monitor);
|
||||
pmo.start();
|
||||
TessAPI1.TessBaseAPIRecognize(handle, monitor);
|
||||
logger.info("Message: " + pmo.getMessage());
|
||||
TessResultIterator ri = TessAPI1.TessBaseAPIGetIterator(handle);
|
||||
int level = TessPageIteratorLevel.RIL_SYMBOL;
|
||||
|
||||
if (ri != null) {
|
||||
do {
|
||||
Pointer symbol = TessAPI1.TessResultIteratorGetUTF8Text(ri, level);
|
||||
float conf = TessAPI1.TessResultIteratorConfidence(ri, level);
|
||||
if (symbol != null) {
|
||||
logger.info(String.format("symbol %s, conf: %f", symbol.getString(0), conf));
|
||||
boolean indent = false;
|
||||
TessChoiceIterator ci = TessAPI1.TessResultIteratorGetChoiceIterator(ri);
|
||||
do {
|
||||
if (indent) {
|
||||
System.out.print("\t");
|
||||
}
|
||||
System.out.print("\t- ");
|
||||
String choice = TessAPI1.TessChoiceIteratorGetUTF8Text(ci);
|
||||
logger.info(String.format("%s conf: %f", choice, TessAPI1.TessChoiceIteratorConfidence(ci)));
|
||||
indent = true;
|
||||
} while (TessAPI1.TessChoiceIteratorNext(ci) == ITessAPI.TRUE);
|
||||
TessAPI1.TessChoiceIteratorDelete(ci);
|
||||
}
|
||||
logger.info("---------------------------------------------");
|
||||
TessAPI1.TessDeleteText(symbol);
|
||||
} while (TessAPI1.TessResultIteratorNext(ri, level) == ITessAPI.TRUE);
|
||||
}
|
||||
|
||||
assertTrue(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of ResultRenderer method, of class TessAPI1.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testResultRenderer() throws Exception {
|
||||
logger.info("TessResultRenderer");
|
||||
String image = String.format("%s/%s", testResourcesDataPath, "eurotext.tif");
|
||||
String output = "capi-test.txt";
|
||||
int set_only_init_params = ITessAPI.FALSE;
|
||||
int oem = TessOcrEngineMode.OEM_DEFAULT;
|
||||
PointerByReference configs = null;
|
||||
int configs_size = 0;
|
||||
|
||||
String[] params = {"load_system_dawg", "tessedit_char_whitelist"};
|
||||
String vals[] = {"F", ""}; //0123456789-.IThisalotfpnex
|
||||
PointerByReference vars_vec = new PointerByReference();
|
||||
vars_vec.setPointer(new StringArray(params));
|
||||
PointerByReference vars_values = new PointerByReference();
|
||||
vars_values.setPointer(new StringArray(vals));
|
||||
NativeSize vars_vec_size = new NativeSize(params.length);
|
||||
|
||||
TessAPI1.TessBaseAPISetOutputName(handle, output);
|
||||
|
||||
int rc = TessAPI1.TessBaseAPIInit4(handle, datapath, language,
|
||||
oem, configs, configs_size, vars_vec, vars_values, vars_vec_size, set_only_init_params);
|
||||
|
||||
if (rc != 0) {
|
||||
TessAPI1.TessBaseAPIDelete(handle);
|
||||
logger.error("Could not initialize tesseract.");
|
||||
return;
|
||||
}
|
||||
|
||||
String outputbase = "test/test-results/outputbase1";
|
||||
TessResultRenderer renderer = TessAPI1.TessHOcrRendererCreate(outputbase);
|
||||
TessAPI1.TessResultRendererInsert(renderer, TessAPI1.TessBoxTextRendererCreate(outputbase));
|
||||
TessAPI1.TessResultRendererInsert(renderer, TessAPI1.TessTextRendererCreate(outputbase));
|
||||
String dataPath = TessAPI1.TessBaseAPIGetDatapath(handle);
|
||||
TessAPI1.TessResultRendererInsert(renderer, TessAPI1.TessPDFRendererCreate(outputbase, dataPath));
|
||||
int result = TessAPI1.TessBaseAPIProcessPages(handle, image, null, 0, renderer);
|
||||
|
||||
// if (result == FALSE) {
|
||||
// logger.error("Error during processing.");
|
||||
// return;
|
||||
// }
|
||||
for (; renderer != null; renderer = TessAPI1.TessResultRendererNext(renderer)) {
|
||||
String ext = TessAPI1.TessResultRendererExtention(renderer).getString(0);
|
||||
logger.info(String.format("TessResultRendererExtention: %s\nTessResultRendererTitle: %s\nTessResultRendererImageNum: %d",
|
||||
ext,
|
||||
TessAPI1.TessResultRendererTitle(renderer).getString(0),
|
||||
TessAPI1.TessResultRendererImageNum(renderer)));
|
||||
}
|
||||
|
||||
TessAPI1.TessDeleteResultRenderer(renderer);
|
||||
assertTrue(new File(outputbase + ".pdf").exists());
|
||||
}
|
||||
}
|
625
NGCC/Tess4J/test/net/sourceforge/tess4j/TessAPIImpl.java
Normal file
625
NGCC/Tess4J/test/net/sourceforge/tess4j/TessAPIImpl.java
Normal file
@@ -0,0 +1,625 @@
|
||||
/*
|
||||
* Copyright @ 2017 Quan Nguyen
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package net.sourceforge.tess4j;
|
||||
|
||||
import com.ochafik.lang.jnaerator.runtime.NativeSize;
|
||||
import com.sun.jna.Pointer;
|
||||
import com.sun.jna.ptr.IntByReference;
|
||||
import com.sun.jna.ptr.PointerByReference;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.DoubleBuffer;
|
||||
import java.nio.FloatBuffer;
|
||||
import java.nio.IntBuffer;
|
||||
import net.sourceforge.lept4j.Boxa;
|
||||
import net.sourceforge.lept4j.Pix;
|
||||
|
||||
public class TessAPIImpl implements TessAPI {
|
||||
|
||||
public TessAPI getInstance() {
|
||||
return TessAPI.INSTANCE;
|
||||
}
|
||||
|
||||
public void TessAPIEndPage() {
|
||||
}
|
||||
|
||||
public void TessAPIRelease() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String TessVersion() {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void TessDeleteText(Pointer text) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void TessDeleteTextArray(PointerByReference arr) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void TessDeleteIntArray(IntBuffer arr) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public ITessAPI.TessResultRenderer TessTextRendererCreate(String outputbase) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public ITessAPI.TessResultRenderer TessHOcrRendererCreate(String outputbase) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public TessResultRenderer TessHOcrRendererCreate2(String outputbase, int font_info) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public ITessAPI.TessResultRenderer TessPDFRendererCreate(String outputbase, String datadir) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public TessResultRenderer TessPDFRendererCreateTextonly(String outputbase, String datadir, int textonly) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public ITessAPI.TessResultRenderer TessUnlvRendererCreate(String outputbase) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public ITessAPI.TessResultRenderer TessBoxTextRendererCreate(String outputbase) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void TessDeleteResultRenderer(ITessAPI.TessResultRenderer renderer) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void TessResultRendererInsert(ITessAPI.TessResultRenderer renderer, ITessAPI.TessResultRenderer next) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public ITessAPI.TessResultRenderer TessResultRendererNext(ITessAPI.TessResultRenderer renderer) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessResultRendererBeginDocument(ITessAPI.TessResultRenderer renderer, String title) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessResultRendererAddImage(ITessAPI.TessResultRenderer renderer, PointerByReference api) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessResultRendererEndDocument(ITessAPI.TessResultRenderer renderer) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Pointer TessResultRendererExtention(ITessAPI.TessResultRenderer renderer) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Pointer TessResultRendererTitle(ITessAPI.TessResultRenderer renderer) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessResultRendererImageNum(ITessAPI.TessResultRenderer renderer) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public ITessAPI.TessBaseAPI TessBaseAPICreate() {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void TessBaseAPIDelete(ITessAPI.TessBaseAPI handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void TessBaseAPISetInputName(ITessAPI.TessBaseAPI handle, String name) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String TessBaseAPIGetInputName(ITessAPI.TessBaseAPI handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void TessBaseAPISetInputImage(ITessAPI.TessBaseAPI handle, Pix pix) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Pix TessBaseAPIGetInputImage(ITessAPI.TessBaseAPI handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessBaseAPIGetSourceYResolution(ITessAPI.TessBaseAPI handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String TessBaseAPIGetDatapath(ITessAPI.TessBaseAPI handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void TessBaseAPISetOutputName(ITessAPI.TessBaseAPI handle, String name) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessBaseAPISetVariable(ITessAPI.TessBaseAPI handle, String name, String value) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessBaseAPIGetIntVariable(ITessAPI.TessBaseAPI handle, String name, IntBuffer value) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessBaseAPIGetBoolVariable(ITessAPI.TessBaseAPI handle, String name, IntBuffer value) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessBaseAPIGetDoubleVariable(ITessAPI.TessBaseAPI handle, String name, DoubleBuffer value) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String TessBaseAPIGetStringVariable(ITessAPI.TessBaseAPI handle, String name) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void TessBaseAPIPrintVariablesToFile(ITessAPI.TessBaseAPI handle, String filename) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessBaseAPIInit1(ITessAPI.TessBaseAPI handle, String datapath, String language, int oem, PointerByReference configs, int configs_size) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessBaseAPIInit2(ITessAPI.TessBaseAPI handle, String datapath, String language, int oem) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessBaseAPIInit3(ITessAPI.TessBaseAPI handle, String datapath, String language) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessBaseAPIInit4(ITessAPI.TessBaseAPI handle, String datapath, String language, int oem, PointerByReference configs, int configs_size, PointerByReference vars_vec, PointerByReference vars_values, NativeSize vars_vec_size, int set_only_non_debug_params) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String TessBaseAPIGetInitLanguagesAsString(ITessAPI.TessBaseAPI handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public PointerByReference TessBaseAPIGetLoadedLanguagesAsVector(ITessAPI.TessBaseAPI handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public PointerByReference TessBaseAPIGetAvailableLanguagesAsVector(ITessAPI.TessBaseAPI handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessBaseAPIInitLangMod(ITessAPI.TessBaseAPI handle, String datapath, String language) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void TessBaseAPIInitForAnalysePage(ITessAPI.TessBaseAPI handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void TessBaseAPIReadConfigFile(ITessAPI.TessBaseAPI handle, String filename, int init_only) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void TessBaseAPISetPageSegMode(ITessAPI.TessBaseAPI handle, int mode) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessBaseAPIGetPageSegMode(ITessAPI.TessBaseAPI handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Pointer TessBaseAPIRect(ITessAPI.TessBaseAPI handle, ByteBuffer imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void TessBaseAPIClearAdaptiveClassifier(ITessAPI.TessBaseAPI handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void TessBaseAPISetImage(ITessAPI.TessBaseAPI handle, ByteBuffer imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void TessBaseAPISetImage2(ITessAPI.TessBaseAPI handle, Pix pix) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void TessBaseAPISetSourceResolution(ITessAPI.TessBaseAPI handle, int ppi) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void TessBaseAPISetRectangle(ITessAPI.TessBaseAPI handle, int left, int top, int width, int height) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Pix TessBaseAPIGetThresholdedImage(ITessAPI.TessBaseAPI handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Boxa TessBaseAPIGetRegions(ITessAPI.TessBaseAPI handle, PointerByReference pixa) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Boxa TessBaseAPIGetTextlines(ITessAPI.TessBaseAPI handle, PointerByReference pixa, PointerByReference blockids) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Boxa TessBaseAPIGetTextlines1(ITessAPI.TessBaseAPI handle, int raw_image, int raw_padding, PointerByReference pixa, PointerByReference blockids, PointerByReference paraids) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Boxa TessBaseAPIGetStrips(ITessAPI.TessBaseAPI handle, PointerByReference pixa, PointerByReference blockids) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Boxa TessBaseAPIGetWords(ITessAPI.TessBaseAPI handle, PointerByReference pixa) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Boxa TessBaseAPIGetConnectedComponents(ITessAPI.TessBaseAPI handle, PointerByReference cc) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Boxa TessBaseAPIGetComponentImages(ITessAPI.TessBaseAPI handle, int level, int text_only, PointerByReference pixa, PointerByReference blockids) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Boxa TessBaseAPIGetComponentImages1(ITessAPI.TessBaseAPI handle, int level, int text_only, int raw_image, int raw_padding, PointerByReference pixa, PointerByReference blockids, PointerByReference paraids) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessBaseAPIGetThresholdedImageScaleFactor(ITessAPI.TessBaseAPI handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void TessBaseAPIDumpPGM(ITessAPI.TessBaseAPI handle, String filename) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public ITessAPI.TessPageIterator TessBaseAPIAnalyseLayout(ITessAPI.TessBaseAPI handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessBaseAPIRecognize(ITessAPI.TessBaseAPI handle, ITessAPI.ETEXT_DESC monitor) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessBaseAPIRecognizeForChopTest(ITessAPI.TessBaseAPI handle, ITessAPI.ETEXT_DESC monitor) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public ITessAPI.TessResultIterator TessBaseAPIGetIterator(ITessAPI.TessBaseAPI handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public ITessAPI.TessMutableIterator TessBaseAPIGetMutableIterator(ITessAPI.TessBaseAPI handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessBaseAPIProcessPages(ITessAPI.TessBaseAPI handle, String filename, String retry_config, int timeout_millisec, ITessAPI.TessResultRenderer renderer) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessBaseAPIProcessPage(ITessAPI.TessBaseAPI handle, Pix pix, int page_index, String filename, String retry_config, int timeout_millisec, ITessAPI.TessResultRenderer renderer) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Pointer TessBaseAPIGetUTF8Text(ITessAPI.TessBaseAPI handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Pointer TessBaseAPIGetHOCRText(ITessAPI.TessBaseAPI handle, int page_number) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Pointer TessBaseAPIGetBoxText(ITessAPI.TessBaseAPI handle, int page_number) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Pointer TessBaseAPIGetUNLVText(ITessAPI.TessBaseAPI handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessBaseAPIMeanTextConf(ITessAPI.TessBaseAPI handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntByReference TessBaseAPIAllWordConfidences(ITessAPI.TessBaseAPI handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessBaseAPIAdaptToWordStr(ITessAPI.TessBaseAPI handle, int mode, String wordstr) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void TessBaseAPIClear(ITessAPI.TessBaseAPI handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void TessBaseAPIEnd(ITessAPI.TessBaseAPI handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessBaseAPIIsValidWord(ITessAPI.TessBaseAPI handle, String word) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessBaseAPIGetTextDirection(ITessAPI.TessBaseAPI handle, IntBuffer out_offset, FloatBuffer out_slope) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void TessBaseAPIClearPersistentCache(ITessAPI.TessBaseAPI handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessBaseAPIDetectOrientationScript(TessBaseAPI handle, IntBuffer orient_deg, FloatBuffer orient_conf, PointerByReference script_name, FloatBuffer script_conf) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String TessBaseAPIGetUnichar(ITessAPI.TessBaseAPI handle, int unichar_id) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void TessPageIteratorDelete(ITessAPI.TessPageIterator handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public ITessAPI.TessPageIterator TessPageIteratorCopy(ITessAPI.TessPageIterator handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void TessPageIteratorBegin(ITessAPI.TessPageIterator handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessPageIteratorNext(ITessAPI.TessPageIterator handle, int level) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessPageIteratorIsAtBeginningOf(ITessAPI.TessPageIterator handle, int level) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessPageIteratorIsAtFinalElement(ITessAPI.TessPageIterator handle, int level, int element) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessPageIteratorBoundingBox(ITessAPI.TessPageIterator handle, int level, IntBuffer left, IntBuffer top, IntBuffer right, IntBuffer bottom) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessPageIteratorBlockType(ITessAPI.TessPageIterator handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Pix TessPageIteratorGetBinaryImage(ITessAPI.TessPageIterator handle, int level) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Pix TessPageIteratorGetImage(ITessAPI.TessPageIterator handle, int level, int padding, Pix original_image, IntBuffer left, IntBuffer top) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessPageIteratorBaseline(ITessAPI.TessPageIterator handle, int level, IntBuffer x1, IntBuffer y1, IntBuffer x2, IntBuffer y2) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void TessPageIteratorOrientation(ITessAPI.TessPageIterator handle, IntBuffer orientation, IntBuffer writing_direction, IntBuffer textline_order, FloatBuffer deskew_angle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void TessPageIteratorParagraphInfo(ITessAPI.TessPageIterator handle, IntBuffer justification, IntBuffer is_list_item, IntBuffer is_crown, IntBuffer first_line_indent) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void TessResultIteratorDelete(ITessAPI.TessResultIterator handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public ITessAPI.TessResultIterator TessResultIteratorCopy(ITessAPI.TessResultIterator handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public ITessAPI.TessPageIterator TessResultIteratorGetPageIterator(ITessAPI.TessResultIterator handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public ITessAPI.TessPageIterator TessResultIteratorGetPageIteratorConst(ITessAPI.TessResultIterator handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessResultIteratorNext(ITessAPI.TessResultIterator handle, int level) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Pointer TessResultIteratorGetUTF8Text(ITessAPI.TessResultIterator handle, int level) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public float TessResultIteratorConfidence(ITessAPI.TessResultIterator handle, int level) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String TessResultIteratorWordRecognitionLanguage(ITessAPI.TessResultIterator handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String TessResultIteratorWordFontAttributes(ITessAPI.TessResultIterator handle, IntBuffer is_bold, IntBuffer is_italic, IntBuffer is_underlined, IntBuffer is_monospace, IntBuffer is_serif, IntBuffer is_smallcaps, IntBuffer pointsize, IntBuffer font_id) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessResultIteratorWordIsFromDictionary(ITessAPI.TessResultIterator handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessResultIteratorWordIsNumeric(ITessAPI.TessResultIterator handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessResultIteratorSymbolIsSuperscript(ITessAPI.TessResultIterator handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessResultIteratorSymbolIsSubscript(ITessAPI.TessResultIterator handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessResultIteratorSymbolIsDropcap(ITessAPI.TessResultIterator handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public ITessAPI.TessChoiceIterator TessResultIteratorGetChoiceIterator(ITessAPI.TessResultIterator handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void TessChoiceIteratorDelete(ITessAPI.TessChoiceIterator handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int TessChoiceIteratorNext(ITessAPI.TessChoiceIterator handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String TessChoiceIteratorGetUTF8Text(ITessAPI.TessChoiceIterator handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public float TessChoiceIteratorConfidence(ITessAPI.TessChoiceIterator handle) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
}
|
648
NGCC/Tess4J/test/net/sourceforge/tess4j/TessAPITest.java
Normal file
648
NGCC/Tess4J/test/net/sourceforge/tess4j/TessAPITest.java
Normal file
@@ -0,0 +1,648 @@
|
||||
/**
|
||||
* Copyright @ 2012 Quan Nguyen
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
* use this file except in compliance with the License. You may obtain a copy of
|
||||
* the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
package net.sourceforge.tess4j;
|
||||
|
||||
import static org.junit.Assert.assertArrayEquals;
|
||||
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileReader;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.FloatBuffer;
|
||||
import java.nio.IntBuffer;
|
||||
import java.util.Arrays;
|
||||
|
||||
import javax.imageio.ImageIO;
|
||||
|
||||
import net.sourceforge.tess4j.util.ImageIOHelper;
|
||||
import net.sourceforge.tess4j.util.LoggHelper;
|
||||
import net.sourceforge.tess4j.util.Utils;
|
||||
|
||||
import org.junit.After;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import com.ochafik.lang.jnaerator.runtime.NativeSize;
|
||||
import com.sun.jna.NativeLong;
|
||||
import com.sun.jna.Pointer;
|
||||
import com.sun.jna.StringArray;
|
||||
import com.sun.jna.ptr.PointerByReference;
|
||||
import net.sourceforge.lept4j.Box;
|
||||
import net.sourceforge.lept4j.Boxa;
|
||||
import static net.sourceforge.lept4j.ILeptonica.L_CLONE;
|
||||
import net.sourceforge.lept4j.Leptonica;
|
||||
import net.sourceforge.lept4j.Pix;
|
||||
import net.sourceforge.lept4j.util.LeptUtils;
|
||||
|
||||
import net.sourceforge.tess4j.ITessAPI.*;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static net.sourceforge.tess4j.ITessAPI.FALSE;
|
||||
import static net.sourceforge.tess4j.ITessAPI.TRUE;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
public class TessAPITest {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
|
||||
private final String datapath = ".";
|
||||
private final String testResourcesDataPath = "test/resources/test-data";
|
||||
String language = "eng";
|
||||
String expOCRResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 <lazy> #90 dog";
|
||||
|
||||
TessAPI api;
|
||||
TessBaseAPI handle;
|
||||
|
||||
@BeforeClass
|
||||
public static void setUpClass() throws Exception {
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void tearDownClass() throws Exception {
|
||||
}
|
||||
|
||||
@Before
|
||||
public void setUp() {
|
||||
api = new TessAPIImpl().getInstance();
|
||||
handle = api.TessBaseAPICreate();
|
||||
}
|
||||
|
||||
@After
|
||||
public void tearDown() {
|
||||
api.TessBaseAPIDelete(handle);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of TessBaseAPIRect method, of class TessAPI.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testTessBaseAPIRect() throws Exception {
|
||||
logger.info("TessBaseAPIRect");
|
||||
String expResult = expOCRResult;
|
||||
File tiff = new File(testResourcesDataPath, "eurotext.tif");
|
||||
BufferedImage image = ImageIO.read(tiff); // require jai-imageio lib to read TIFF
|
||||
ByteBuffer buf = ImageIOHelper.convertImageData(image);
|
||||
int bpp = image.getColorModel().getPixelSize();
|
||||
int bytespp = bpp / 8;
|
||||
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
|
||||
api.TessBaseAPIInit3(handle, datapath, language);
|
||||
api.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO);
|
||||
Pointer utf8Text = api.TessBaseAPIRect(handle, buf, bytespp, bytespl, 90, 50, 862, 614);
|
||||
String result = utf8Text.getString(0);
|
||||
api.TessDeleteText(utf8Text);
|
||||
logger.info(result);
|
||||
assertTrue(result.startsWith(expResult));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of TessBaseAPIGetUTF8Text method, of class TessAPI.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testTessBaseAPIGetUTF8Text() throws Exception {
|
||||
logger.info("TessBaseAPIGetUTF8Text");
|
||||
String expResult = expOCRResult;
|
||||
File tiff = new File(testResourcesDataPath, "eurotext.tif");
|
||||
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
|
||||
ByteBuffer buf = ImageIOHelper.convertImageData(image);
|
||||
int bpp = image.getColorModel().getPixelSize();
|
||||
int bytespp = bpp / 8;
|
||||
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
|
||||
api.TessBaseAPIInit3(handle, datapath, language);
|
||||
api.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO);
|
||||
api.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
|
||||
api.TessBaseAPISetRectangle(handle, 90, 50, 862, 614);
|
||||
Pointer utf8Text = api.TessBaseAPIGetUTF8Text(handle);
|
||||
String result = utf8Text.getString(0);
|
||||
api.TessDeleteText(utf8Text);
|
||||
logger.info(result);
|
||||
assertTrue(result.startsWith(expResult));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of TessBaseAPIGetUTF8Text method, of class TessAPI.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testTessBaseAPIGetUTF8Text_Pix() throws Exception {
|
||||
logger.info("TessBaseAPIGetUTF8Text_Pix");
|
||||
String expResult = expOCRResult;
|
||||
File tiff = new File(testResourcesDataPath, "eurotext.tif");
|
||||
Leptonica leptInstance = Leptonica.INSTANCE;
|
||||
Pix pix = leptInstance.pixRead(tiff.getPath());
|
||||
api.TessBaseAPIInit3(handle, datapath, language);
|
||||
api.TessBaseAPISetImage2(handle, pix);
|
||||
Pointer utf8Text = api.TessBaseAPIGetUTF8Text(handle);
|
||||
String result = utf8Text.getString(0);
|
||||
api.TessDeleteText(utf8Text);
|
||||
logger.info(result);
|
||||
|
||||
//release Pix resource
|
||||
PointerByReference pRef = new PointerByReference();
|
||||
pRef.setValue(pix.getPointer());
|
||||
leptInstance.pixDestroy(pRef);
|
||||
|
||||
assertTrue(result.startsWith(expResult));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of TessBaseAPIGetComponentImages method, of class TessAPI.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testTessBaseAPIGetComponentImages() throws Exception {
|
||||
logger.info("TessBaseAPIGetComponentImages");
|
||||
File image = new File(testResourcesDataPath, "eurotext.png");
|
||||
int expResult = 12; // number of lines in the test image
|
||||
Leptonica leptInstance = Leptonica.INSTANCE;
|
||||
Pix pix = leptInstance.pixRead(image.getPath());
|
||||
api.TessBaseAPIInit3(handle, datapath, language);
|
||||
api.TessBaseAPISetImage2(handle, pix);
|
||||
PointerByReference pixa = null;
|
||||
PointerByReference blockids = null;
|
||||
Boxa boxes = api.TessBaseAPIGetComponentImages(handle, TessPageIteratorLevel.RIL_TEXTLINE, TRUE, pixa, blockids);
|
||||
// boxes = api.TessBaseAPIGetRegions(handle, pixa); // equivalent to TessPageIteratorLevel.RIL_BLOCK
|
||||
int boxCount = leptInstance.boxaGetCount(boxes);
|
||||
for (int i = 0; i < boxCount; i++) {
|
||||
Box box = leptInstance.boxaGetBox(boxes, i, L_CLONE);
|
||||
if (box == null) {
|
||||
continue;
|
||||
}
|
||||
api.TessBaseAPISetRectangle(handle, box.x, box.y, box.w, box.h);
|
||||
Pointer utf8Text = api.TessBaseAPIGetUTF8Text(handle);
|
||||
String ocrResult = utf8Text.getString(0);
|
||||
api.TessDeleteText(utf8Text);
|
||||
int conf = api.TessBaseAPIMeanTextConf(handle);
|
||||
System.out.print(String.format("Box[%d]: x=%d, y=%d, w=%d, h=%d, confidence: %d, text: %s", i, box.x, box.y, box.w, box.h, conf, ocrResult));
|
||||
LeptUtils.dispose(box);
|
||||
}
|
||||
|
||||
// release Pix and Boxa resources
|
||||
LeptUtils.dispose(pix);
|
||||
LeptUtils.dispose(boxes);
|
||||
|
||||
assertEquals(expResult, boxCount);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of TessVersion method, of class TessAPI.
|
||||
*/
|
||||
@Test
|
||||
public void testTessVersion() {
|
||||
logger.info("TessVersion");
|
||||
String expResult = "3.05.01";
|
||||
String result = api.TessVersion();
|
||||
logger.info(result);
|
||||
assertTrue(result.startsWith(expResult));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of TessBaseAPIGetBoolVariable method, of class TessAPI.
|
||||
*/
|
||||
@Test
|
||||
public void testTessBaseAPIGetBoolVariable() {
|
||||
logger.info("TessBaseAPIGetBoolVariable");
|
||||
String name = "tessedit_create_hocr";
|
||||
api.TessBaseAPISetVariable(handle, name, "1");
|
||||
IntBuffer value = IntBuffer.allocate(1);
|
||||
int result = -1;
|
||||
if (api.TessBaseAPIGetBoolVariable(handle, "tessedit_create_hocr", value) == TRUE) {
|
||||
result = value.get(0);
|
||||
}
|
||||
int expResult = 1;
|
||||
assertEquals(expResult, result);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of TessBaseAPIPrintVariables method, of class TessAPI.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testTessBaseAPIPrintVariablesToFile() throws Exception {
|
||||
logger.info("TessBaseAPIPrintVariablesToFile");
|
||||
String var = "tessedit_char_whitelist";
|
||||
String value = "0123456789";
|
||||
api.TessBaseAPISetVariable(handle, var, value);
|
||||
String filename = "printvar.txt";
|
||||
api.TessBaseAPIPrintVariablesToFile(handle, filename); // will crash if not invoked after some method
|
||||
File file = new File(filename);
|
||||
BufferedReader input = new BufferedReader(new FileReader(file));
|
||||
StringBuilder strB = new StringBuilder();
|
||||
String line;
|
||||
String EOL = System.getProperty("line.separator");
|
||||
while ((line = input.readLine()) != null) {
|
||||
strB.append(line).append(EOL);
|
||||
}
|
||||
input.close();
|
||||
file.delete();
|
||||
assertTrue(strB.toString().contains(var + "\t" + value));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of TessBaseAPIInit4 method, of class TessAPI.
|
||||
*/
|
||||
@Test
|
||||
public void testTessBaseAPIInit4() {
|
||||
logger.info("TessBaseAPIInit4");
|
||||
int oem = TessOcrEngineMode.OEM_DEFAULT;
|
||||
PointerByReference configs = null; //new PointerByReference();
|
||||
int configs_size = 0;
|
||||
|
||||
// disable loading dictionaries
|
||||
String[] args = new String[]{"load_system_dawg", "load_freq_dawg"};
|
||||
StringArray sarray = new StringArray(args);
|
||||
PointerByReference vars_vec = new PointerByReference();
|
||||
vars_vec.setPointer(sarray);
|
||||
|
||||
args = new String[]{"F", "F"};
|
||||
sarray = new StringArray(args);
|
||||
PointerByReference vars_values = new PointerByReference();
|
||||
vars_values.setPointer(sarray);
|
||||
|
||||
NativeSize vars_vec_size = new NativeSize(args.length);
|
||||
|
||||
int expResult = 0;
|
||||
int result = api.TessBaseAPIInit4(handle, datapath, language, oem, configs, configs_size, vars_vec, vars_values, vars_vec_size, FALSE);
|
||||
assertEquals(expResult, result);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of TessBaseAPIGetInitLanguagesAsString method, of class TessAPI.
|
||||
*/
|
||||
@Test
|
||||
public void testTessBaseAPIGetInitLanguagesAsString() {
|
||||
logger.info("TessBaseAPIGetInitLanguagesAsString");
|
||||
String expResult = "";
|
||||
String result = api.TessBaseAPIGetInitLanguagesAsString(handle);
|
||||
assertEquals(expResult, result);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of TessBaseAPIGetLoadedLanguagesAsVector method, of class TessAPI.
|
||||
*/
|
||||
@Test
|
||||
public void testTessBaseAPIGetLoadedLanguagesAsVector() {
|
||||
logger.info("TessBaseAPIGetLoadedLanguagesAsVector");
|
||||
api.TessBaseAPIInit3(handle, datapath, language);
|
||||
String[] expResult = {"eng"};
|
||||
String[] result = api.TessBaseAPIGetLoadedLanguagesAsVector(handle).getPointer().getStringArray(0);
|
||||
assertArrayEquals(expResult, result);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of TessBaseAPIGetAvailableLanguagesAsVector method, of class
|
||||
* TessAPI.
|
||||
*/
|
||||
@Test
|
||||
public void testTessBaseAPIGetAvailableLanguagesAsVector() {
|
||||
logger.info("TessBaseAPIGetAvailableLanguagesAsVector");
|
||||
api.TessBaseAPIInit3(handle, datapath, language);
|
||||
String[] expResult = {"eng"};
|
||||
String[] result = api.TessBaseAPIGetAvailableLanguagesAsVector(handle).getPointer().getStringArray(0);
|
||||
assertTrue(Arrays.asList(result).containsAll(Arrays.asList(expResult)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of TessBaseAPIGetHOCRText method, of class TessAPI.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testTessBaseAPIGetHOCRText() throws Exception {
|
||||
logger.info("TessBaseAPIGetHOCRText");
|
||||
File tiff = new File(testResourcesDataPath, "eurotext.tif");
|
||||
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
|
||||
ByteBuffer buf = ImageIOHelper.convertImageData(image);
|
||||
int bpp = image.getColorModel().getPixelSize();
|
||||
int bytespp = bpp / 8;
|
||||
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
|
||||
api.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO);
|
||||
api.TessBaseAPIInit3(handle, datapath, language);
|
||||
api.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
|
||||
int page_number = 0;
|
||||
Pointer utf8Text = api.TessBaseAPIGetHOCRText(handle, page_number);
|
||||
String result = utf8Text.getString(0);
|
||||
api.TessDeleteText(utf8Text);
|
||||
assertTrue(result.contains("<div class='ocr_page'"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of TessBaseAPIAnalyseLayout method, of class TessAPI.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testTessBaseAPIAnalyseLayout() throws Exception {
|
||||
logger.info("TessBaseAPIAnalyseLayout");
|
||||
File image = new File(testResourcesDataPath, "eurotext.png");
|
||||
int expResult = 12; // number of lines in the test image
|
||||
Leptonica leptInstance = Leptonica.INSTANCE;
|
||||
Pix pix = leptInstance.pixRead(image.getPath());
|
||||
api.TessBaseAPIInit3(handle, datapath, language);
|
||||
api.TessBaseAPISetImage2(handle, pix);
|
||||
int pageIteratorLevel = TessPageIteratorLevel.RIL_TEXTLINE;
|
||||
logger.info("PageIteratorLevel: " + Utils.getConstantName(pageIteratorLevel, TessPageIteratorLevel.class));
|
||||
int i = 0;
|
||||
TessPageIterator pi = api.TessBaseAPIAnalyseLayout(handle);
|
||||
|
||||
do {
|
||||
IntBuffer leftB = IntBuffer.allocate(1);
|
||||
IntBuffer topB = IntBuffer.allocate(1);
|
||||
IntBuffer rightB = IntBuffer.allocate(1);
|
||||
IntBuffer bottomB = IntBuffer.allocate(1);
|
||||
api.TessPageIteratorBoundingBox(pi, pageIteratorLevel, leftB, topB, rightB, bottomB);
|
||||
int left = leftB.get();
|
||||
int top = topB.get();
|
||||
int right = rightB.get();
|
||||
int bottom = bottomB.get();
|
||||
logger.info(String.format("Box[%d]: x=%d, y=%d, w=%d, h=%d", i++, left, top, right - left, bottom - top));
|
||||
} while (api.TessPageIteratorNext(pi, pageIteratorLevel) == TRUE);
|
||||
api.TessPageIteratorDelete(pi);
|
||||
assertEquals(expResult, i);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of TessBaseAPIDetectOrientationScript method, of class TessAPI.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testTessBaseAPIDetectOrientationScript() throws Exception {
|
||||
logger.info("TessBaseAPIDetectOrientationScript");
|
||||
File image = new File(testResourcesDataPath, "eurotext.png");
|
||||
int expResult = TRUE;
|
||||
Leptonica leptInstance = Leptonica.INSTANCE;
|
||||
Pix pix = leptInstance.pixRead(image.getPath());
|
||||
api.TessBaseAPIInit3(handle, datapath, language);
|
||||
api.TessBaseAPISetImage2(handle, pix);
|
||||
|
||||
IntBuffer orient_degB = IntBuffer.allocate(1);
|
||||
FloatBuffer orient_confB = FloatBuffer.allocate(1);
|
||||
PointerByReference script_nameB = new PointerByReference();
|
||||
FloatBuffer script_confB = FloatBuffer.allocate(1);
|
||||
|
||||
int result = api.TessBaseAPIDetectOrientationScript(handle, orient_degB, orient_confB, script_nameB, script_confB);
|
||||
if (result == TRUE) {
|
||||
int orient_deg = orient_degB.get();
|
||||
float orient_conf = orient_confB.get();
|
||||
String script_name = script_nameB.getValue().getString(0);
|
||||
float script_conf = script_confB.get();
|
||||
logger.info(String.format("OrientationScript: orient_deg=%d, orient_conf=%f, script_name=%s, script_conf=%f", orient_deg, orient_conf, script_name, script_conf));
|
||||
}
|
||||
|
||||
PointerByReference pRef = new PointerByReference();
|
||||
pRef.setValue(pix.getPointer());
|
||||
leptInstance.pixDestroy(pRef);
|
||||
assertEquals(expResult, result);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of Orientation and script detection (OSD).
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testOSD() throws Exception {
|
||||
logger.info("OSD");
|
||||
int expResult = TessPageSegMode.PSM_AUTO_OSD;
|
||||
IntBuffer orientation = IntBuffer.allocate(1);
|
||||
IntBuffer direction = IntBuffer.allocate(1);
|
||||
IntBuffer order = IntBuffer.allocate(1);
|
||||
FloatBuffer deskew_angle = FloatBuffer.allocate(1);
|
||||
File tiff = new File(testResourcesDataPath, "eurotext.tif");
|
||||
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
|
||||
ByteBuffer buf = ImageIOHelper.convertImageData(image);
|
||||
int bpp = image.getColorModel().getPixelSize();
|
||||
int bytespp = bpp / 8;
|
||||
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
|
||||
api.TessBaseAPIInit3(handle, datapath, language);
|
||||
api.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO_OSD);
|
||||
int actualResult = api.TessBaseAPIGetPageSegMode(handle);
|
||||
logger.info("PSM: " + Utils.getConstantName(actualResult, TessPageSegMode.class));
|
||||
api.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
|
||||
int success = api.TessBaseAPIRecognize(handle, null);
|
||||
if (success == 0) {
|
||||
TessPageIterator pi = api.TessBaseAPIAnalyseLayout(handle);
|
||||
api.TessPageIteratorOrientation(pi, orientation, direction, order, deskew_angle);
|
||||
logger.info(String.format(
|
||||
"Orientation: %s\nWritingDirection: %s\nTextlineOrder: %s\nDeskew angle: %.4f\n",
|
||||
Utils.getConstantName(orientation.get(), TessOrientation.class),
|
||||
Utils.getConstantName(direction.get(), TessWritingDirection.class),
|
||||
Utils.getConstantName(order.get(), TessTextlineOrder.class),
|
||||
deskew_angle.get()));
|
||||
}
|
||||
assertEquals(expResult, actualResult);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of ResultIterator and PageIterator.
|
||||
*
|
||||
* @throws Exception
|
||||
*/
|
||||
@Test
|
||||
public void testResultIterator() throws Exception {
|
||||
logger.info("TessBaseAPIGetIterator");
|
||||
File tiff = new File(testResourcesDataPath, "eurotext.tif");
|
||||
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
|
||||
ByteBuffer buf = ImageIOHelper.convertImageData(image);
|
||||
int bpp = image.getColorModel().getPixelSize();
|
||||
int bytespp = bpp / 8;
|
||||
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
|
||||
api.TessBaseAPIInit3(handle, datapath, language);
|
||||
api.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO);
|
||||
api.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
|
||||
ETEXT_DESC monitor = new ETEXT_DESC();
|
||||
TimeVal timeout = new TimeVal();
|
||||
timeout.tv_sec = new NativeLong(0L); // time > 0 causes blank ouput
|
||||
monitor.end_time = timeout;
|
||||
ProgressMonitor pmo = new ProgressMonitor(monitor);
|
||||
pmo.start();
|
||||
api.TessBaseAPIRecognize(handle, monitor);
|
||||
logger.info("Message: " + pmo.getMessage());
|
||||
TessResultIterator ri = api.TessBaseAPIGetIterator(handle);
|
||||
TessPageIterator pi = api.TessResultIteratorGetPageIterator(ri);
|
||||
api.TessPageIteratorBegin(pi);
|
||||
logger.info("Bounding boxes:\nchar(s) left top right bottom confidence font-attributes");
|
||||
int level = TessPageIteratorLevel.RIL_WORD;
|
||||
|
||||
// int height = image.getHeight();
|
||||
do {
|
||||
Pointer ptr = api.TessResultIteratorGetUTF8Text(ri, level);
|
||||
String word = ptr.getString(0);
|
||||
api.TessDeleteText(ptr);
|
||||
float confidence = api.TessResultIteratorConfidence(ri, level);
|
||||
IntBuffer leftB = IntBuffer.allocate(1);
|
||||
IntBuffer topB = IntBuffer.allocate(1);
|
||||
IntBuffer rightB = IntBuffer.allocate(1);
|
||||
IntBuffer bottomB = IntBuffer.allocate(1);
|
||||
api.TessPageIteratorBoundingBox(pi, level, leftB, topB, rightB, bottomB);
|
||||
int left = leftB.get();
|
||||
int top = topB.get();
|
||||
int right = rightB.get();
|
||||
int bottom = bottomB.get();
|
||||
System.out.print(String.format("%s %d %d %d %d %f", word, left, top, right, bottom, confidence));
|
||||
// logger.info(String.format("%s %d %d %d %d", str, left, height - bottom, right, height - top)); //
|
||||
// training box coordinates
|
||||
|
||||
IntBuffer boldB = IntBuffer.allocate(1);
|
||||
IntBuffer italicB = IntBuffer.allocate(1);
|
||||
IntBuffer underlinedB = IntBuffer.allocate(1);
|
||||
IntBuffer monospaceB = IntBuffer.allocate(1);
|
||||
IntBuffer serifB = IntBuffer.allocate(1);
|
||||
IntBuffer smallcapsB = IntBuffer.allocate(1);
|
||||
IntBuffer pointSizeB = IntBuffer.allocate(1);
|
||||
IntBuffer fontIdB = IntBuffer.allocate(1);
|
||||
String fontName = api.TessResultIteratorWordFontAttributes(ri, boldB, italicB, underlinedB, monospaceB,
|
||||
serifB, smallcapsB, pointSizeB, fontIdB);
|
||||
boolean bold = boldB.get() == TRUE;
|
||||
boolean italic = italicB.get() == TRUE;
|
||||
boolean underlined = underlinedB.get() == TRUE;
|
||||
boolean monospace = monospaceB.get() == TRUE;
|
||||
boolean serif = serifB.get() == TRUE;
|
||||
boolean smallcaps = smallcapsB.get() == TRUE;
|
||||
int pointSize = pointSizeB.get();
|
||||
int fontId = fontIdB.get();
|
||||
logger.info(String.format(" font: %s, size: %d, font id: %d, bold: %b,"
|
||||
+ " italic: %b, underlined: %b, monospace: %b, serif: %b, smallcap: %b", fontName, pointSize,
|
||||
fontId, bold, italic, underlined, monospace, serif, smallcaps));
|
||||
} while (api.TessPageIteratorNext(pi, level) == TRUE);
|
||||
|
||||
assertTrue(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of ChoiceIterator.
|
||||
*
|
||||
* @throws Exception
|
||||
*/
|
||||
@Test
|
||||
public void testChoiceIterator() throws Exception {
|
||||
logger.info("TessResultIteratorGetChoiceIterator");
|
||||
File tiff = new File(testResourcesDataPath, "eurotext.tif");
|
||||
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
|
||||
ByteBuffer buf = ImageIOHelper.convertImageData(image);
|
||||
int bpp = image.getColorModel().getPixelSize();
|
||||
int bytespp = bpp / 8;
|
||||
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
|
||||
api.TessBaseAPIInit3(handle, datapath, language);
|
||||
api.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
|
||||
api.TessBaseAPISetVariable(handle, "save_blob_choices", "T");
|
||||
api.TessBaseAPISetRectangle(handle, 37, 228, 548, 31);
|
||||
ETEXT_DESC monitor = new ETEXT_DESC();
|
||||
ProgressMonitor pmo = new ProgressMonitor(monitor);
|
||||
pmo.start();
|
||||
api.TessBaseAPIRecognize(handle, monitor);
|
||||
logger.info("Message: " + pmo.getMessage());
|
||||
TessResultIterator ri = api.TessBaseAPIGetIterator(handle);
|
||||
int level = TessPageIteratorLevel.RIL_SYMBOL;
|
||||
|
||||
if (ri != null) {
|
||||
do {
|
||||
Pointer symbol = api.TessResultIteratorGetUTF8Text(ri, level);
|
||||
float conf = api.TessResultIteratorConfidence(ri, level);
|
||||
if (symbol != null) {
|
||||
logger.info(String.format("symbol %s, conf: %f", symbol.getString(0), conf));
|
||||
boolean indent = false;
|
||||
TessChoiceIterator ci = api.TessResultIteratorGetChoiceIterator(ri);
|
||||
do {
|
||||
if (indent) {
|
||||
System.out.print("\t");
|
||||
}
|
||||
System.out.print("\t- ");
|
||||
String choice = api.TessChoiceIteratorGetUTF8Text(ci);
|
||||
logger.info(String.format("%s conf: %f", choice, api.TessChoiceIteratorConfidence(ci)));
|
||||
indent = true;
|
||||
} while (api.TessChoiceIteratorNext(ci) == ITessAPI.TRUE);
|
||||
api.TessChoiceIteratorDelete(ci);
|
||||
}
|
||||
logger.info("---------------------------------------------");
|
||||
api.TessDeleteText(symbol);
|
||||
} while (api.TessResultIteratorNext(ri, level) == ITessAPI.TRUE);
|
||||
}
|
||||
|
||||
assertTrue(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of ResultRenderer method, of class TessAPI.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testResultRenderer() throws Exception {
|
||||
logger.info("TessResultRenderer");
|
||||
String image = String.format("%s/%s", testResourcesDataPath, "eurotext.tif");
|
||||
String output = "capi-test.txt";
|
||||
int set_only_init_params = FALSE;
|
||||
int oem = TessOcrEngineMode.OEM_DEFAULT;
|
||||
PointerByReference configs = null;
|
||||
int configs_size = 0;
|
||||
|
||||
String[] params = {"load_system_dawg", "tessedit_char_whitelist"};
|
||||
String vals[] = {"F", ""}; //0123456789-.IThisalotfpnex
|
||||
PointerByReference vars_vec = new PointerByReference();
|
||||
vars_vec.setPointer(new StringArray(params));
|
||||
PointerByReference vars_values = new PointerByReference();
|
||||
vars_values.setPointer(new StringArray(vals));
|
||||
NativeSize vars_vec_size = new NativeSize(params.length);
|
||||
|
||||
api.TessBaseAPISetOutputName(handle, output);
|
||||
|
||||
int rc = api.TessBaseAPIInit4(handle, datapath, language,
|
||||
oem, configs, configs_size, vars_vec, vars_values, vars_vec_size, set_only_init_params);
|
||||
|
||||
if (rc != 0) {
|
||||
api.TessBaseAPIDelete(handle);
|
||||
logger.error("Could not initialize tesseract.");
|
||||
return;
|
||||
}
|
||||
|
||||
String outputbase = "test/test-results/outputbase";
|
||||
TessResultRenderer renderer = api.TessHOcrRendererCreate(outputbase);
|
||||
api.TessResultRendererInsert(renderer, api.TessBoxTextRendererCreate(outputbase));
|
||||
api.TessResultRendererInsert(renderer, api.TessTextRendererCreate(outputbase));
|
||||
String dataPath = api.TessBaseAPIGetDatapath(handle);
|
||||
api.TessResultRendererInsert(renderer, api.TessPDFRendererCreate(outputbase, dataPath));
|
||||
int result = api.TessBaseAPIProcessPages(handle, image, null, 0, renderer);
|
||||
|
||||
if (result == FALSE) {
|
||||
logger.error("Error during processing.");
|
||||
return;
|
||||
}
|
||||
|
||||
for (; renderer != null; renderer = api.TessResultRendererNext(renderer)) {
|
||||
String ext = api.TessResultRendererExtention(renderer).getString(0);
|
||||
logger.info(String.format("TessResultRendererExtention: %s\nTessResultRendererTitle: %s\nTessResultRendererImageNum: %d",
|
||||
ext,
|
||||
api.TessResultRendererTitle(renderer).getString(0),
|
||||
api.TessResultRendererImageNum(renderer)));
|
||||
}
|
||||
|
||||
api.TessDeleteResultRenderer(renderer);
|
||||
assertTrue(new File(outputbase + ".pdf").exists());
|
||||
}
|
||||
}
|
267
NGCC/Tess4J/test/net/sourceforge/tess4j/Tesseract1Test.java
Normal file
267
NGCC/Tess4J/test/net/sourceforge/tess4j/Tesseract1Test.java
Normal file
@@ -0,0 +1,267 @@
|
||||
/**
|
||||
* Copyright @ 2010 Quan Nguyen
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
* use this file except in compliance with the License. You may obtain a copy of
|
||||
* the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
package net.sourceforge.tess4j;
|
||||
|
||||
import java.awt.Rectangle;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Arrays;
|
||||
|
||||
import javax.imageio.IIOImage;
|
||||
import javax.imageio.ImageIO;
|
||||
|
||||
import net.sourceforge.tess4j.util.LoggHelper;
|
||||
import net.sourceforge.tess4j.util.Utils;
|
||||
import net.sourceforge.tess4j.util.ImageHelper;
|
||||
import net.sourceforge.tess4j.util.ImageIOHelper;
|
||||
import net.sourceforge.tess4j.ITesseract.RenderedFormat;
|
||||
import net.sourceforge.tess4j.ITessAPI.TessPageIteratorLevel;
|
||||
|
||||
import com.recognition.software.jdeskew.ImageDeskew;
|
||||
|
||||
import org.junit.After;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.junit.Assert.assertArrayEquals;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
public class Tesseract1Test {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
|
||||
static final double MINIMUM_DESKEW_THRESHOLD = 0.05d;
|
||||
ITesseract instance;
|
||||
|
||||
private final String datapath = ".";
|
||||
private final String testResourcesDataPath = "test/resources/test-data";
|
||||
private final String expOCRResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 <lazy> #90 dog";
|
||||
|
||||
@BeforeClass
|
||||
public static void setUpClass() throws Exception {
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void tearDownClass() throws Exception {
|
||||
}
|
||||
|
||||
@Before
|
||||
public void setUp() {
|
||||
instance = new Tesseract1();
|
||||
instance.setDatapath(new File(datapath).getPath());
|
||||
}
|
||||
|
||||
@After
|
||||
public void tearDown() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of doOCR method, of class Tesseract1.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testDoOCR_File() throws Exception {
|
||||
logger.info("doOCR on a PNG image");
|
||||
File imageFile = new File(testResourcesDataPath, "eurotext.png");
|
||||
String expResult = expOCRResult;
|
||||
String result = instance.doOCR(imageFile);
|
||||
logger.info(result);
|
||||
assertEquals(expResult, result.substring(0, expResult.length()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of doOCR method, of class Tesseract.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testDoOCR_UNLV_Zone_File() throws Exception {
|
||||
logger.info("doOCR on a PNG image with UNLV zone file .uzn");
|
||||
//UNLV zone format: left top width height label
|
||||
File imageFile = new File(testResourcesDataPath, "eurotext_unlv.png");
|
||||
String expResult = "& duck/goose, as 12.5% of E-mail\n\n"
|
||||
+ "from aspammer@website.com is spam.\n\n"
|
||||
+ "The (quick) [brown] {fox} jumps!\n"
|
||||
+ "Over the $43,456.78 <lazy> #90 dog";
|
||||
String result = instance.doOCR(imageFile);
|
||||
logger.info(result);
|
||||
assertEquals(expResult, result.trim());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of doOCR method, of class Tesseract.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testDoOCR_File_With_Configs() throws Exception {
|
||||
logger.info("doOCR with configs");
|
||||
File imageFile = new File(testResourcesDataPath, "eurotext.png");
|
||||
String expResult = "[-0123456789.\n ]+";
|
||||
List<String> configs = Arrays.asList("digits");
|
||||
instance.setConfigs(configs);
|
||||
String result = instance.doOCR(imageFile);
|
||||
logger.info(result);
|
||||
assertTrue(result.matches(expResult));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of doOCR method, of class Tesseract1.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testDoOCR_File_Rectangle() throws Exception {
|
||||
logger.info("doOCR on a BMP image with bounding rectangle");
|
||||
File imageFile = new File(testResourcesDataPath, "eurotext.bmp");
|
||||
Rectangle rect = new Rectangle(0, 0, 1024, 800); // define an equal or smaller region of interest on the image
|
||||
String expResult = expOCRResult;
|
||||
String result = instance.doOCR(imageFile, rect);
|
||||
logger.info(result);
|
||||
assertEquals(expResult, result.substring(0, expResult.length()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of doOCR method, of class Tesseract1.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testDoOCR_PDF() throws Exception {
|
||||
logger.info("doOCR on a PDF document");
|
||||
File imageFile = new File(testResourcesDataPath, "eurotext.pdf");
|
||||
List<IIOImage> imageList = ImageIOHelper.getIIOImageList(imageFile);
|
||||
String expResult = expOCRResult;
|
||||
String result = instance.doOCR(imageList, null);
|
||||
logger.info(result);
|
||||
assertEquals(expResult, result.substring(0, expResult.length()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of doOCR method, of class Tesseract1.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testDoOCR_BufferedImage() throws Exception {
|
||||
logger.info("doOCR on a buffered image of a PNG");
|
||||
File imageFile = new File(testResourcesDataPath, "eurotext.png");
|
||||
BufferedImage bi = ImageIO.read(imageFile);
|
||||
String expResult = expOCRResult;
|
||||
String result = instance.doOCR(bi);
|
||||
logger.info(result);
|
||||
assertEquals(expResult, result.substring(0, expResult.length()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of deskew algorithm.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testDoOCR_SkewedImage() throws Exception {
|
||||
logger.info("doOCR on a skewed PNG image");
|
||||
File imageFile = new File(testResourcesDataPath, "eurotext_deskew.png");
|
||||
BufferedImage bi = ImageIO.read(imageFile);
|
||||
ImageDeskew id = new ImageDeskew(bi);
|
||||
double imageSkewAngle = id.getSkewAngle(); // determine skew angle
|
||||
if ((imageSkewAngle > MINIMUM_DESKEW_THRESHOLD || imageSkewAngle < -(MINIMUM_DESKEW_THRESHOLD))) {
|
||||
bi = ImageHelper.rotateImage(bi, -imageSkewAngle); // deskew image
|
||||
}
|
||||
|
||||
String expResult = expOCRResult;
|
||||
String result = instance.doOCR(bi);
|
||||
logger.info(result);
|
||||
assertEquals(expResult, result.substring(0, expResult.length()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of createDocuments method, of class Tesseract.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testCreateDocuments() throws Exception {
|
||||
logger.info("createDocuments for an image");
|
||||
File imageFile1 = new File(testResourcesDataPath, "eurotext.pdf");
|
||||
File imageFile2 = new File(testResourcesDataPath, "eurotext.png");
|
||||
String outputbase1 = "test/test-results/docrenderer1-1";
|
||||
String outputbase2 = "test/test-results/docrenderer1-2";
|
||||
List<RenderedFormat> formats = new ArrayList<RenderedFormat>(Arrays.asList(RenderedFormat.HOCR, RenderedFormat.PDF, RenderedFormat.TEXT));
|
||||
instance.createDocuments(new String[]{imageFile1.getPath(), imageFile2.getPath()}, new String[]{outputbase1, outputbase2}, formats);
|
||||
assertTrue(new File(outputbase1 + ".pdf").exists());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of getWords method, of class Tesseract1.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testGetWords() throws Exception {
|
||||
logger.info("getWords");
|
||||
File imageFile = new File(testResourcesDataPath, "eurotext.tif");
|
||||
|
||||
String expResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 <lazy> #90 dog";
|
||||
String[] expResults = expResult.split("\\s");
|
||||
|
||||
int pageIteratorLevel = TessPageIteratorLevel.RIL_WORD;
|
||||
logger.info("PageIteratorLevel: " + Utils.getConstantName(pageIteratorLevel, TessPageIteratorLevel.class));
|
||||
BufferedImage bi = ImageIO.read(imageFile);
|
||||
List<Word> result = instance.getWords(bi, pageIteratorLevel);
|
||||
|
||||
// print the complete result
|
||||
for (Word word : result) {
|
||||
logger.info(word.toString());
|
||||
}
|
||||
|
||||
List<String> text = new ArrayList<String>();
|
||||
for (Word word : result.subList(0, expResults.length)) {
|
||||
text.add(word.getText());
|
||||
}
|
||||
|
||||
assertArrayEquals(expResults, text.toArray());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of getSegmentedRegions method, of class Tesseract1.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testGetSegmentedRegions() throws Exception {
|
||||
logger.info("getSegmentedRegions at given TessPageIteratorLevel");
|
||||
File imageFile = new File(testResourcesDataPath, "eurotext.png");
|
||||
BufferedImage bi = ImageIO.read(imageFile);
|
||||
int level = TessPageIteratorLevel.RIL_SYMBOL;
|
||||
logger.info("PageIteratorLevel: " + Utils.getConstantName(level, TessPageIteratorLevel.class));
|
||||
List<Rectangle> result = instance.getSegmentedRegions(bi, level);
|
||||
for (int i = 0; i < result.size(); i++) {
|
||||
Rectangle rect = result.get(i);
|
||||
logger.info(String.format("Box[%d]: x=%d, y=%d, w=%d, h=%d", i, rect.x, rect.y, rect.width, rect.height));
|
||||
}
|
||||
|
||||
assertTrue(result.size() > 0);
|
||||
}
|
||||
}
|
267
NGCC/Tess4J/test/net/sourceforge/tess4j/TesseractTest.java
Normal file
267
NGCC/Tess4J/test/net/sourceforge/tess4j/TesseractTest.java
Normal file
@@ -0,0 +1,267 @@
|
||||
/**
|
||||
* Copyright @ 2010 Quan Nguyen
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
* use this file except in compliance with the License. You may obtain a copy of
|
||||
* the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
package net.sourceforge.tess4j;
|
||||
|
||||
import java.awt.Rectangle;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.File;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
||||
import javax.imageio.IIOImage;
|
||||
import javax.imageio.ImageIO;
|
||||
|
||||
import net.sourceforge.tess4j.util.ImageHelper;
|
||||
import net.sourceforge.tess4j.util.ImageIOHelper;
|
||||
import net.sourceforge.tess4j.util.LoggHelper;
|
||||
import net.sourceforge.tess4j.util.Utils;
|
||||
|
||||
import net.sourceforge.tess4j.ITesseract.RenderedFormat;
|
||||
import net.sourceforge.tess4j.ITessAPI.TessPageIteratorLevel;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
import com.recognition.software.jdeskew.ImageDeskew;
|
||||
|
||||
import org.junit.After;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class TesseractTest {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
|
||||
static final double MINIMUM_DESKEW_THRESHOLD = 0.05d;
|
||||
ITesseract instance;
|
||||
|
||||
private final String datapath = ".";
|
||||
private final String testResourcesDataPath = "test/resources/test-data";
|
||||
private final String expOCRResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 <lazy> #90 dog";
|
||||
|
||||
@BeforeClass
|
||||
public static void setUpClass() throws Exception {
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void tearDownClass() throws Exception {
|
||||
}
|
||||
|
||||
@Before
|
||||
public void setUp() {
|
||||
instance = new Tesseract();
|
||||
instance.setDatapath(new File(datapath).getPath());
|
||||
}
|
||||
|
||||
@After
|
||||
public void tearDown() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of doOCR method, of class Tesseract.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testDoOCR_File() throws Exception {
|
||||
logger.info("doOCR on a PNG image");
|
||||
File imageFile = new File(testResourcesDataPath, "eurotext.png");
|
||||
String expResult = expOCRResult;
|
||||
String result = instance.doOCR(imageFile);
|
||||
logger.info(result);
|
||||
assertEquals(expResult, result.substring(0, expResult.length()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of doOCR method, of class Tesseract.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testDoOCR_UNLV_Zone_File() throws Exception {
|
||||
logger.info("doOCR on a PNG image with UNLV zone file .uzn");
|
||||
//UNLV zone format: left top width height label
|
||||
File imageFile = new File(testResourcesDataPath, "eurotext_unlv.png");
|
||||
String expResult = "& duck/goose, as 12.5% of E-mail\n\n"
|
||||
+ "from aspammer@website.com is spam.\n\n"
|
||||
+ "The (quick) [brown] {fox} jumps!\n"
|
||||
+ "Over the $43,456.78 <lazy> #90 dog";
|
||||
String result = instance.doOCR(imageFile);
|
||||
logger.info(result);
|
||||
assertEquals(expResult, result.trim());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of doOCR method, of class Tesseract.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testDoOCR_File_With_Configs() throws Exception {
|
||||
logger.info("doOCR with configs");
|
||||
File imageFile = new File(testResourcesDataPath, "eurotext.png");
|
||||
String expResult = "[-0123456789.\n ]+";
|
||||
List<String> configs = Arrays.asList("digits");
|
||||
instance.setConfigs(configs);
|
||||
String result = instance.doOCR(imageFile);
|
||||
logger.info(result);
|
||||
assertTrue(result.matches(expResult));
|
||||
instance.setConfigs(null); // since Tesseract instance is a singleton, clear configs so the effects do not carry on into subsequent runs.
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of doOCR method, of class Tesseract.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testDoOCR_File_Rectangle() throws Exception {
|
||||
logger.info("doOCR on a BMP image with bounding rectangle");
|
||||
File imageFile = new File(testResourcesDataPath, "eurotext.bmp");
|
||||
Rectangle rect = new Rectangle(0, 0, 1024, 800); // define an equal or smaller region of interest on the image
|
||||
String expResult = expOCRResult;
|
||||
String result = instance.doOCR(imageFile, rect);
|
||||
logger.info(result);
|
||||
assertEquals(expResult, result.substring(0, expResult.length()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of doOCR method, of class Tesseract.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testDoOCR_PDF() throws Exception {
|
||||
logger.info("doOCR on a PDF document");
|
||||
File imageFile = new File(testResourcesDataPath, "eurotext.pdf");
|
||||
List<IIOImage> imageList = ImageIOHelper.getIIOImageList(imageFile);
|
||||
String expResult = expOCRResult;
|
||||
String result = instance.doOCR(imageList, null);
|
||||
logger.info(result);
|
||||
assertEquals(expResult, result.substring(0, expResult.length()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of doOCR method, of class Tesseract.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testDoOCR_BufferedImage() throws Exception {
|
||||
logger.info("doOCR on a buffered image of a PNG");
|
||||
File imageFile = new File(testResourcesDataPath, "eurotext.png");
|
||||
BufferedImage bi = ImageIO.read(imageFile);
|
||||
String expResult = expOCRResult;
|
||||
String result = instance.doOCR(bi);
|
||||
logger.info(result);
|
||||
assertEquals(expResult, result.substring(0, expResult.length()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of deskew algorithm.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testDoOCR_SkewedImage() throws Exception {
|
||||
logger.info("doOCR on a skewed PNG image");
|
||||
File imageFile = new File(testResourcesDataPath, "eurotext_deskew.png");
|
||||
BufferedImage bi = ImageIO.read(imageFile);
|
||||
ImageDeskew id = new ImageDeskew(bi);
|
||||
double imageSkewAngle = id.getSkewAngle(); // determine skew angle
|
||||
if ((imageSkewAngle > MINIMUM_DESKEW_THRESHOLD || imageSkewAngle < -(MINIMUM_DESKEW_THRESHOLD))) {
|
||||
bi = ImageHelper.rotateImage(bi, -imageSkewAngle); // deskew image
|
||||
}
|
||||
|
||||
String expResult = expOCRResult;
|
||||
String result = instance.doOCR(bi);
|
||||
logger.info(result);
|
||||
assertEquals(expResult, result.substring(0, expResult.length()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of createDocuments method, of class Tesseract.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testCreateDocuments() throws Exception {
|
||||
logger.info("createDocuments for multiple images");
|
||||
File imageFile1 = new File(testResourcesDataPath, "eurotext.pdf");
|
||||
File imageFile2 = new File(testResourcesDataPath, "eurotext.png");
|
||||
String outputbase1 = "test/test-results/docrenderer-1";
|
||||
String outputbase2 = "test/test-results/docrenderer-2";
|
||||
List<RenderedFormat> formats = new ArrayList<RenderedFormat>(Arrays.asList(RenderedFormat.HOCR, RenderedFormat.PDF, RenderedFormat.TEXT));
|
||||
instance.createDocuments(new String[]{imageFile1.getPath(), imageFile2.getPath()}, new String[]{outputbase1, outputbase2}, formats);
|
||||
assertTrue(new File(outputbase1 + ".pdf").exists());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of getWords method, of class Tesseract.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testGetWords() throws Exception {
|
||||
logger.info("getWords");
|
||||
File imageFile = new File(testResourcesDataPath, "eurotext.tif");
|
||||
|
||||
String expResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 <lazy> #90 dog";
|
||||
String[] expResults = expResult.split("\\s");
|
||||
|
||||
int pageIteratorLevel = TessPageIteratorLevel.RIL_WORD;
|
||||
logger.info("PageIteratorLevel: " + Utils.getConstantName(pageIteratorLevel, TessPageIteratorLevel.class));
|
||||
BufferedImage bi = ImageIO.read(imageFile);
|
||||
List<Word> result = instance.getWords(bi, pageIteratorLevel);
|
||||
|
||||
//print the complete result
|
||||
for (Word word : result) {
|
||||
logger.info(word.toString());
|
||||
}
|
||||
|
||||
List<String> text = new ArrayList<String>();
|
||||
for (Word word : result.subList(0, expResults.length)) {
|
||||
text.add(word.getText());
|
||||
}
|
||||
|
||||
assertArrayEquals(expResults, text.toArray());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of getSegmentedRegions method, of class Tesseract.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testGetSegmentedRegions() throws Exception {
|
||||
logger.info("getSegmentedRegions at given TessPageIteratorLevel");
|
||||
File imageFile = new File(testResourcesDataPath, "eurotext.png");
|
||||
BufferedImage bi = ImageIO.read(imageFile);
|
||||
int level = TessPageIteratorLevel.RIL_SYMBOL;
|
||||
logger.info("PageIteratorLevel: " + Utils.getConstantName(level, TessPageIteratorLevel.class));
|
||||
List<Rectangle> result = instance.getSegmentedRegions(bi, level);
|
||||
for (int i = 0; i < result.size(); i++) {
|
||||
Rectangle rect = result.get(i);
|
||||
logger.info(String.format("Box[%d]: x=%d, y=%d, w=%d, h=%d", i, rect.x, rect.y, rect.width, rect.height));
|
||||
}
|
||||
|
||||
assertTrue(result.size() > 0);
|
||||
}
|
||||
}
|
@@ -0,0 +1,84 @@
|
||||
/**
|
||||
* Copyright @ 2008 Quan Nguyen
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
* use this file except in compliance with the License. You may obtain a copy of
|
||||
* the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
package net.sourceforge.tess4j;
|
||||
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.io.File;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
|
||||
|
||||
import net.sourceforge.tess4j.util.LoadLibs;
|
||||
|
||||
import net.sourceforge.tess4j.util.LoggHelper;
|
||||
import org.junit.Test;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class TestFolderExtraction {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
|
||||
|
||||
@Test
|
||||
public void testFolderExtraction() {
|
||||
|
||||
File tessDataFolder = null;
|
||||
|
||||
try {
|
||||
|
||||
/**
|
||||
* Loads the image from resources.
|
||||
*/
|
||||
String filename = String.format("%s/%s", "/test-data", "eurotext.pdf");
|
||||
URL defaultImage = getClass().getResource(filename);
|
||||
File imageFile = new File(defaultImage.toURI());
|
||||
|
||||
/**
|
||||
* Extracts <code>tessdata</code> folder into a temp folder.
|
||||
*/
|
||||
logger.info("Loading the tessdata folder into a temporary folder.");
|
||||
tessDataFolder = LoadLibs.extractTessResources("tessdata");
|
||||
|
||||
/**
|
||||
* Gets tesseract instance and sets data path.
|
||||
*/
|
||||
ITesseract instance = new Tesseract();
|
||||
|
||||
if (tessDataFolder != null) {
|
||||
logger.info(tessDataFolder.getAbsolutePath());
|
||||
instance.setDatapath(tessDataFolder.getParent());
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs OCR on the image.
|
||||
*/
|
||||
String result = instance.doOCR(imageFile);
|
||||
logger.info(result);
|
||||
|
||||
} catch (TesseractException e) {
|
||||
logger.error(e.getMessage());
|
||||
logger.error(e.getMessage(), e);
|
||||
} catch (URISyntaxException e) {
|
||||
logger.error(e.getMessage(), e);
|
||||
}
|
||||
|
||||
// checks if tessdata folder exists
|
||||
assertTrue(tessDataFolder != null && tessDataFolder.exists());
|
||||
}
|
||||
|
||||
}
|
45
NGCC/Tess4J/test/net/sourceforge/tess4j/Word.java
Normal file
45
NGCC/Tess4J/test/net/sourceforge/tess4j/Word.java
Normal file
@@ -0,0 +1,45 @@
|
||||
package net.sourceforge.tess4j;
|
||||
|
||||
import java.awt.Rectangle;
|
||||
|
||||
/**
|
||||
* Encapsulates Tesseract results.
|
||||
*/
|
||||
class Word {
|
||||
|
||||
private final String text;
|
||||
private final float confidence;
|
||||
private final Rectangle rect;
|
||||
|
||||
public Word(String text, float confidence, Rectangle rect) {
|
||||
this.text = text;
|
||||
this.confidence = confidence;
|
||||
this.rect = rect;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the text
|
||||
*/
|
||||
public String getText() {
|
||||
return text;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the confidence
|
||||
*/
|
||||
public float getConfidence() {
|
||||
return confidence;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the bounding box
|
||||
*/
|
||||
public Rectangle getRect() {
|
||||
return rect;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%s\t[Confidence: %f Bounding box: %d %d %d %d]", text, confidence, rect.x, rect.y, rect.width, rect.height);
|
||||
}
|
||||
}
|
@@ -0,0 +1,128 @@
|
||||
/*
|
||||
* Copyright 2014 Quan Nguyen.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package net.sourceforge.tess4j.util;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import org.junit.After;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
import static org.junit.Assert.*;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class PdfUtilitiesTest {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
|
||||
private final String TEST_RESOURCES_DATA_PATH = "test/resources/test-data";
|
||||
|
||||
@BeforeClass
|
||||
public static void setUpClass() {
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void tearDownClass() {
|
||||
}
|
||||
|
||||
@Before
|
||||
public void setUp() {
|
||||
System.setProperty(PdfUtilities.PDF_LIBRARY, PdfUtilities.PDFBOX); // Note: comment out to test Ghostscript
|
||||
}
|
||||
|
||||
@After
|
||||
public void tearDown() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of convertPdf2Tiff method, of class PdfUtilities.
|
||||
*
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Test
|
||||
public void testConvertPdf2Tiff() throws Exception {
|
||||
logger.info("convertPdf2Tiff");
|
||||
File inputPdfFile = new File(TEST_RESOURCES_DATA_PATH, "eurotext.pdf");
|
||||
File result = PdfUtilities.convertPdf2Tiff(inputPdfFile);
|
||||
result.deleteOnExit();
|
||||
assertTrue(result.exists());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of convertPdf2Png method, of class PdfUtilities.
|
||||
*
|
||||
* @throws java.io.IOException
|
||||
*/
|
||||
@Test
|
||||
public void testConvertPdf2Png() throws IOException {
|
||||
logger.info("convertPdf2Png");
|
||||
File inputPdfFile = new File(TEST_RESOURCES_DATA_PATH, "eurotext.pdf");
|
||||
File[] results = PdfUtilities.convertPdf2Png(inputPdfFile);
|
||||
assertTrue(results.length > 0);
|
||||
|
||||
//clean up
|
||||
File parentDir = results[0].getParentFile();
|
||||
for (File result : results) {
|
||||
result.delete();
|
||||
}
|
||||
parentDir.delete();
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of splitPdf method, of class PdfUtilities.
|
||||
*/
|
||||
@Test
|
||||
public void testSplitPdf() {
|
||||
logger.info("splitPdf");
|
||||
File inputPdfFile = new File(TEST_RESOURCES_DATA_PATH, "multipage-pdf.pdf");
|
||||
File outputPdfFile = new File("test/test-results/multipage-pdf_splitted.pdf");
|
||||
int startPage = 2;
|
||||
int endPage = 3;
|
||||
int expResult = 2;
|
||||
PdfUtilities.splitPdf(inputPdfFile, outputPdfFile, startPage, endPage);
|
||||
int pageCount = PdfUtilities.getPdfPageCount(outputPdfFile);
|
||||
assertEquals(expResult, pageCount);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of getPdfPageCount method, of class PdfUtilities.
|
||||
*/
|
||||
@Test
|
||||
public void testGetPdfPageCount() {
|
||||
logger.info("getPdfPageCount");
|
||||
File inputPdfFile = new File(TEST_RESOURCES_DATA_PATH, "multipage-pdf.pdf");
|
||||
int expResult = 5;
|
||||
int result = PdfUtilities.getPdfPageCount(inputPdfFile);
|
||||
assertEquals(expResult, result);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of mergePdf method, of class PdfUtilities.
|
||||
*/
|
||||
@Test
|
||||
public void testMergePdf() {
|
||||
logger.info("mergePdf");
|
||||
File pdfPartOne = new File(TEST_RESOURCES_DATA_PATH, "eurotext.pdf");
|
||||
File pdfPartTwo = new File(TEST_RESOURCES_DATA_PATH, "multipage-pdf.pdf");
|
||||
int expResult = 6;
|
||||
File outputPdfFile = new File("test/test-results", "multipage-pdf_merged.pdf");
|
||||
File[] inputPdfFiles = {pdfPartOne, pdfPartTwo};
|
||||
PdfUtilities.mergePdf(inputPdfFiles, outputPdfFile);
|
||||
assertEquals(expResult, PdfUtilities.getPdfPageCount(outputPdfFile));
|
||||
}
|
||||
|
||||
}
|
BIN
NGCC/Tess4J/test/resources/test-data/eurotext.bmp
Normal file
BIN
NGCC/Tess4J/test/resources/test-data/eurotext.bmp
Normal file
Binary file not shown.
After Width: | Height: | Size: 100 KiB |
BIN
NGCC/Tess4J/test/resources/test-data/eurotext.pdf
Normal file
BIN
NGCC/Tess4J/test/resources/test-data/eurotext.pdf
Normal file
Binary file not shown.
BIN
NGCC/Tess4J/test/resources/test-data/eurotext.png
Normal file
BIN
NGCC/Tess4J/test/resources/test-data/eurotext.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 14 KiB |
BIN
NGCC/Tess4J/test/resources/test-data/eurotext.tif
Normal file
BIN
NGCC/Tess4J/test/resources/test-data/eurotext.tif
Normal file
Binary file not shown.
BIN
NGCC/Tess4J/test/resources/test-data/eurotext_deskew.png
Normal file
BIN
NGCC/Tess4J/test/resources/test-data/eurotext_deskew.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 200 KiB |
BIN
NGCC/Tess4J/test/resources/test-data/eurotext_unlv.png
Normal file
BIN
NGCC/Tess4J/test/resources/test-data/eurotext_unlv.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 14 KiB |
3
NGCC/Tess4J/test/resources/test-data/eurotext_unlv.uzn
Normal file
3
NGCC/Tess4J/test/resources/test-data/eurotext_unlv.uzn
Normal file
@@ -0,0 +1,3 @@
|
||||
97 162 747 50 ThirdLine
|
||||
97 209 828 55 FourthLine
|
||||
92 56 810 107 First2Lines
|
BIN
NGCC/Tess4J/test/resources/test-data/multipage-pdf.pdf
Normal file
BIN
NGCC/Tess4J/test/resources/test-data/multipage-pdf.pdf
Normal file
Binary file not shown.
Reference in New Issue
Block a user