fix bugs
This commit is contained in:
parent
21a86f242e
commit
22c7aa27ec
116
NGCC/Tess4J/build.xml
Normal file
116
NGCC/Tess4J/build.xml
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!-- You may freely edit this file. See commented blocks below for -->
|
||||||
|
<!-- some examples of how to customize the build. -->
|
||||||
|
<!-- (If you delete it and reopen the project it will be recreated.) -->
|
||||||
|
<!-- By default, only the Clean and Build commands use this build script. -->
|
||||||
|
<!-- Commands such as Run, Debug, and Test only use this build script if -->
|
||||||
|
<!-- the Compile on Save feature is turned off for the project. -->
|
||||||
|
<!-- You can turn off the Compile on Save (or Deploy on Save) setting -->
|
||||||
|
<!-- in the project's Project Properties dialog box.-->
|
||||||
|
<project name="Tess4J" default="default" basedir=".">
|
||||||
|
<description>Builds, tests, and runs the project Tess4J.</description>
|
||||||
|
<import file="nbproject/build-impl.xml"/>
|
||||||
|
<property name="version" value="3.4.8"/>
|
||||||
|
|
||||||
|
<target name="-pre-compile">
|
||||||
|
<copy todir="${build.classes.dir}">
|
||||||
|
<fileset dir="lib" includes="win32-x86/**,win32-x86-64/**" />
|
||||||
|
</copy>
|
||||||
|
</target>
|
||||||
|
<target name="-post-jar">
|
||||||
|
<move todir="./">
|
||||||
|
<fileset dir="${dist.dir}" includes="${ant.project.name}.jar" />
|
||||||
|
</move>
|
||||||
|
<delete dir="${dist.dir}/lib"/>
|
||||||
|
</target>
|
||||||
|
<target name="-post-clean">
|
||||||
|
<delete file="${ant.project.name}.jar"/>
|
||||||
|
<delete>
|
||||||
|
<fileset dir="${test.src.dir}/test-results" includes="**"/>
|
||||||
|
</delete>
|
||||||
|
</target>
|
||||||
|
<target name="dist" depends="clean,jar"
|
||||||
|
description="create binary and source distribution archives">
|
||||||
|
<mkdir dir="${dist.dir}"/>
|
||||||
|
<zip zipfile="${dist.dir}/${ant.project.name}-${version}-src.zip">
|
||||||
|
<zipfileset dir="." prefix="${ant.project.name}">
|
||||||
|
<exclude name="**/.*"/>
|
||||||
|
<!--<exclude name="${dist.dir}/**"/>-->
|
||||||
|
<exclude name="${build.dir}/**"/>
|
||||||
|
<exclude name="doc/**"/>
|
||||||
|
<!--<exclude name="${ant.project.name}.jar"/>-->
|
||||||
|
<exclude name="native/**"/>
|
||||||
|
</zipfileset>
|
||||||
|
</zip>
|
||||||
|
<!--
|
||||||
|
<zip zipfile="${dist.dir}/${ant.project.name}-${version}.zip">
|
||||||
|
<zipfileset dir="." includes="*,lib/**,tessdata/**,${dist.jar}" prefix="${ant.project.name}"
|
||||||
|
excludes="build.xml,src,build,dist,doc,test,nbproject,native,eurotext.*">
|
||||||
|
<exclude name="*.mf"/>
|
||||||
|
</zipfileset>
|
||||||
|
</zip>
|
||||||
|
-->
|
||||||
|
<delete dir="${dist.dir}/lib"/>
|
||||||
|
</target>
|
||||||
|
<!--
|
||||||
|
|
||||||
|
There exist several targets which are by default empty and which can be
|
||||||
|
used for execution of your tasks. These targets are usually executed
|
||||||
|
before and after some main targets. They are:
|
||||||
|
|
||||||
|
-pre-init: called before initialization of project properties
|
||||||
|
-post-init: called after initialization of project properties
|
||||||
|
-pre-compile: called before javac compilation
|
||||||
|
-post-compile: called after javac compilation
|
||||||
|
-pre-compile-single: called before javac compilation of single file
|
||||||
|
-post-compile-single: called after javac compilation of single file
|
||||||
|
-pre-compile-test: called before javac compilation of JUnit tests
|
||||||
|
-post-compile-test: called after javac compilation of JUnit tests
|
||||||
|
-pre-compile-test-single: called before javac compilation of single JUnit test
|
||||||
|
-post-compile-test-single: called after javac compilation of single JUunit test
|
||||||
|
-pre-jar: called before JAR building
|
||||||
|
-post-jar: called after JAR building
|
||||||
|
-post-clean: called after cleaning build products
|
||||||
|
|
||||||
|
(Targets beginning with '-' are not intended to be called on their own.)
|
||||||
|
|
||||||
|
Example of inserting an obfuscator after compilation could look like this:
|
||||||
|
|
||||||
|
<target name="-post-compile">
|
||||||
|
<obfuscate>
|
||||||
|
<fileset dir="${build.classes.dir}"/>
|
||||||
|
</obfuscate>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
For list of available properties check the imported
|
||||||
|
nbproject/build-impl.xml file.
|
||||||
|
|
||||||
|
|
||||||
|
Another way to customize the build is by overriding existing main targets.
|
||||||
|
The targets of interest are:
|
||||||
|
|
||||||
|
-init-macrodef-javac: defines macro for javac compilation
|
||||||
|
-init-macrodef-junit: defines macro for junit execution
|
||||||
|
-init-macrodef-debug: defines macro for class debugging
|
||||||
|
-init-macrodef-java: defines macro for class execution
|
||||||
|
-do-jar-with-manifest: JAR building (if you are using a manifest)
|
||||||
|
-do-jar-without-manifest: JAR building (if you are not using a manifest)
|
||||||
|
run: execution of project
|
||||||
|
-javadoc-build: Javadoc generation
|
||||||
|
test-report: JUnit report generation
|
||||||
|
|
||||||
|
An example of overriding the target for project execution could look like this:
|
||||||
|
|
||||||
|
<target name="run" depends="Tess4J-impl.jar">
|
||||||
|
<exec dir="bin" executable="launcher.exe">
|
||||||
|
<arg file="${dist.jar}"/>
|
||||||
|
</exec>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
Notice that the overridden target depends on the jar target and not only on
|
||||||
|
the compile target as the regular run target does. Again, for a list of available
|
||||||
|
properties which you can use, check the target you are overriding in the
|
||||||
|
nbproject/build-impl.xml file.
|
||||||
|
|
||||||
|
-->
|
||||||
|
</project>
|
32
NGCC/Tess4J/dist/README.TXT
vendored
Normal file
32
NGCC/Tess4J/dist/README.TXT
vendored
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
========================
|
||||||
|
BUILD OUTPUT DESCRIPTION
|
||||||
|
========================
|
||||||
|
|
||||||
|
When you build an Java application project that has a main class, the IDE
|
||||||
|
automatically copies all of the JAR
|
||||||
|
files on the projects classpath to your projects dist/lib folder. The IDE
|
||||||
|
also adds each of the JAR files to the Class-Path element in the application
|
||||||
|
JAR files manifest file (MANIFEST.MF).
|
||||||
|
|
||||||
|
To run the project from the command line, go to the dist folder and
|
||||||
|
type the following:
|
||||||
|
|
||||||
|
java -jar "tess4j-3.4.8.jar"
|
||||||
|
|
||||||
|
To distribute this project, zip up the dist folder (including the lib folder)
|
||||||
|
and distribute the ZIP file.
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
|
||||||
|
* If two JAR files on the project classpath have the same name, only the first
|
||||||
|
JAR file is copied to the lib folder.
|
||||||
|
* Only JAR files are copied to the lib folder.
|
||||||
|
If the classpath contains other types of files or folders, these files (folders)
|
||||||
|
are not copied.
|
||||||
|
* If a library on the projects classpath also has a Class-Path element
|
||||||
|
specified in the manifest,the content of the Class-Path element has to be on
|
||||||
|
the projects runtime path.
|
||||||
|
* To set a main class in a standard Java project, right-click the project node
|
||||||
|
in the Projects window and choose Properties. Then click Run and enter the
|
||||||
|
class name in the Main Class field. Alternatively, you can manually type the
|
||||||
|
class name in the manifest Main-Class element.
|
BIN
NGCC/Tess4J/dist/tess4j-3.4.8.jar
vendored
Normal file
BIN
NGCC/Tess4J/dist/tess4j-3.4.8.jar
vendored
Normal file
Binary file not shown.
BIN
NGCC/Tess4J/lib/commons-beanutils-1.9.2.jar
Normal file
BIN
NGCC/Tess4J/lib/commons-beanutils-1.9.2.jar
Normal file
Binary file not shown.
BIN
NGCC/Tess4J/lib/commons-io-2.6.jar
Normal file
BIN
NGCC/Tess4J/lib/commons-io-2.6.jar
Normal file
Binary file not shown.
BIN
NGCC/Tess4J/lib/commons-logging-1.2.jar
Normal file
BIN
NGCC/Tess4J/lib/commons-logging-1.2.jar
Normal file
Binary file not shown.
BIN
NGCC/Tess4J/lib/fontbox-2.0.9.jar
Normal file
BIN
NGCC/Tess4J/lib/fontbox-2.0.9.jar
Normal file
Binary file not shown.
BIN
NGCC/Tess4J/lib/ghost4j-1.0.1.jar
Normal file
BIN
NGCC/Tess4J/lib/ghost4j-1.0.1.jar
Normal file
Binary file not shown.
BIN
NGCC/Tess4J/lib/hamcrest-core-1.3.jar
Normal file
BIN
NGCC/Tess4J/lib/hamcrest-core-1.3.jar
Normal file
Binary file not shown.
BIN
NGCC/Tess4J/lib/itext-2.1.7.jar
Normal file
BIN
NGCC/Tess4J/lib/itext-2.1.7.jar
Normal file
Binary file not shown.
BIN
NGCC/Tess4J/lib/jai-imageio-core-1.4.0.jar
Normal file
BIN
NGCC/Tess4J/lib/jai-imageio-core-1.4.0.jar
Normal file
Binary file not shown.
BIN
NGCC/Tess4J/lib/jbig2-imageio-3.0.0.jar
Normal file
BIN
NGCC/Tess4J/lib/jbig2-imageio-3.0.0.jar
Normal file
Binary file not shown.
BIN
NGCC/Tess4J/lib/jboss-vfs-3.2.12.Final.jar
Normal file
BIN
NGCC/Tess4J/lib/jboss-vfs-3.2.12.Final.jar
Normal file
Binary file not shown.
BIN
NGCC/Tess4J/lib/jcl-over-slf4j-1.7.25.jar
Normal file
BIN
NGCC/Tess4J/lib/jcl-over-slf4j-1.7.25.jar
Normal file
Binary file not shown.
BIN
NGCC/Tess4J/lib/jna-4.1.0.jar
Normal file
BIN
NGCC/Tess4J/lib/jna-4.1.0.jar
Normal file
Binary file not shown.
BIN
NGCC/Tess4J/lib/jul-to-slf4j-1.7.25.jar
Normal file
BIN
NGCC/Tess4J/lib/jul-to-slf4j-1.7.25.jar
Normal file
Binary file not shown.
BIN
NGCC/Tess4J/lib/junit-4.12.jar
Normal file
BIN
NGCC/Tess4J/lib/junit-4.12.jar
Normal file
Binary file not shown.
BIN
NGCC/Tess4J/lib/lept4j-1.6.4.jar
Normal file
BIN
NGCC/Tess4J/lib/lept4j-1.6.4.jar
Normal file
Binary file not shown.
BIN
NGCC/Tess4J/lib/log4j-1.2.17.jar
Normal file
BIN
NGCC/Tess4J/lib/log4j-1.2.17.jar
Normal file
Binary file not shown.
BIN
NGCC/Tess4J/lib/log4j-over-slf4j-1.7.25.jar
Normal file
BIN
NGCC/Tess4J/lib/log4j-over-slf4j-1.7.25.jar
Normal file
Binary file not shown.
BIN
NGCC/Tess4J/lib/logback-classic-1.2.3.jar
Normal file
BIN
NGCC/Tess4J/lib/logback-classic-1.2.3.jar
Normal file
Binary file not shown.
BIN
NGCC/Tess4J/lib/logback-core-1.2.3.jar
Normal file
BIN
NGCC/Tess4J/lib/logback-core-1.2.3.jar
Normal file
Binary file not shown.
11
NGCC/Tess4J/lib/nblibraries.properties
Normal file
11
NGCC/Tess4J/lib/nblibraries.properties
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
# To change this template, choose Tools | Templates
|
||||||
|
# and open the template in the editor.
|
||||||
|
|
||||||
|
libs.hamcrest.classpath=\
|
||||||
|
${base}/hamcrest-core-1.3.jar
|
||||||
|
libs.hamcrest.displayName=Hamcrest 1.3
|
||||||
|
libs.hamcrest.prop-maven-dependencies=org.hamcrest:hamcrest-core:1.3:jar
|
||||||
|
libs.junit_4.classpath=\
|
||||||
|
${base}/junit-4.12.jar
|
||||||
|
libs.junit_4.displayName=JUnit 4.12
|
||||||
|
libs.junit_4.prop-maven-dependencies=junit:junit:4.12:jar
|
BIN
NGCC/Tess4J/lib/pdfbox-2.0.9.jar
Normal file
BIN
NGCC/Tess4J/lib/pdfbox-2.0.9.jar
Normal file
Binary file not shown.
BIN
NGCC/Tess4J/lib/pdfbox-tools-2.0.9.jar
Normal file
BIN
NGCC/Tess4J/lib/pdfbox-tools-2.0.9.jar
Normal file
Binary file not shown.
BIN
NGCC/Tess4J/lib/slf4j-api-1.7.25.jar
Normal file
BIN
NGCC/Tess4J/lib/slf4j-api-1.7.25.jar
Normal file
Binary file not shown.
BIN
NGCC/Tess4J/lib/win32-x86-64/libtesseract3051.dll
Normal file
BIN
NGCC/Tess4J/lib/win32-x86-64/libtesseract3051.dll
Normal file
Binary file not shown.
BIN
NGCC/Tess4J/lib/win32-x86/libtesseract3051.dll
Normal file
BIN
NGCC/Tess4J/lib/win32-x86/libtesseract3051.dll
Normal file
Binary file not shown.
BIN
NGCC/Tess4J/lib/xmlgraphics-commons-1.5.jar
Normal file
BIN
NGCC/Tess4J/lib/xmlgraphics-commons-1.5.jar
Normal file
Binary file not shown.
1793
NGCC/Tess4J/nbproject/build-impl.xml
Normal file
1793
NGCC/Tess4J/nbproject/build-impl.xml
Normal file
File diff suppressed because it is too large
Load Diff
8
NGCC/Tess4J/nbproject/genfiles.properties
Normal file
8
NGCC/Tess4J/nbproject/genfiles.properties
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
build.xml.data.CRC32=1dc6a699
|
||||||
|
build.xml.script.CRC32=f0eaf91d
|
||||||
|
build.xml.stylesheet.CRC32=28e38971@1.38.2.45
|
||||||
|
# This file is used by a NetBeans-based IDE to track changes in generated files such as build-impl.xml.
|
||||||
|
# Do not edit this file. You may delete it but then the IDE will never regenerate such files for you.
|
||||||
|
nbproject/build-impl.xml.data.CRC32=1f03e186
|
||||||
|
nbproject/build-impl.xml.script.CRC32=a52f4060
|
||||||
|
nbproject/build-impl.xml.stylesheet.CRC32=3a2fa800@1.88.0.48
|
0
NGCC/Tess4J/nbproject/private/config.properties
Normal file
0
NGCC/Tess4J/nbproject/private/config.properties
Normal file
8
NGCC/Tess4J/nbproject/private/private.properties
Normal file
8
NGCC/Tess4J/nbproject/private/private.properties
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
compile.on.save=true
|
||||||
|
do.depend=true
|
||||||
|
do.jar=true
|
||||||
|
do.jlink=false
|
||||||
|
javac.debug=false
|
||||||
|
javadoc.preview=true
|
||||||
|
jlink.strip=false
|
||||||
|
user.properties.file=C:\\Users\\Quan\\AppData\\Roaming\\NetBeans\\dev\\build.properties
|
7
NGCC/Tess4J/nbproject/private/private.xml
Normal file
7
NGCC/Tess4J/nbproject/private/private.xml
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project-private xmlns="http://www.netbeans.org/ns/project-private/1">
|
||||||
|
<editor-bookmarks xmlns="http://www.netbeans.org/ns/editor-bookmarks/2" lastBookmarkId="0"/>
|
||||||
|
<open-files xmlns="http://www.netbeans.org/ns/projectui-open-files/2">
|
||||||
|
<group/>
|
||||||
|
</open-files>
|
||||||
|
</project-private>
|
138
NGCC/Tess4J/nbproject/project.properties
Normal file
138
NGCC/Tess4J/nbproject/project.properties
Normal file
@ -0,0 +1,138 @@
|
|||||||
|
annotation.processing.enabled=true
|
||||||
|
annotation.processing.enabled.in.editor=false
|
||||||
|
annotation.processing.processors.list=
|
||||||
|
annotation.processing.run.all.processors=true
|
||||||
|
annotation.processing.source.output=${build.generated.sources.dir}/ap-source-output
|
||||||
|
application.desc=A Java wrapper for Tesseract OCR API
|
||||||
|
application.homepage=http://tess4j.sf.net
|
||||||
|
application.title=Tess4J
|
||||||
|
application.vendor=Quan Nguyen
|
||||||
|
build.classes.dir=${build.dir}/classes
|
||||||
|
build.classes.excludes=**/*.java,**/*.form
|
||||||
|
# This directory is removed when the project is cleaned:
|
||||||
|
build.dir=build
|
||||||
|
build.generated.dir=${build.dir}/generated
|
||||||
|
build.generated.sources.dir=${build.dir}/generated-sources
|
||||||
|
# Only compile against the classpath explicitly listed here:
|
||||||
|
build.sysclasspath=ignore
|
||||||
|
build.test.classes.dir=${build.dir}/test/classes
|
||||||
|
build.test.results.dir=${build.dir}/test/results
|
||||||
|
# Uncomment to specify the preferred debugger connection transport:
|
||||||
|
#debug.transport=dt_socket
|
||||||
|
debug.classpath=\
|
||||||
|
${run.classpath}
|
||||||
|
debug.modulepath=\
|
||||||
|
${run.modulepath}
|
||||||
|
debug.test.classpath=\
|
||||||
|
${run.test.classpath}
|
||||||
|
debug.test.modulepath=\
|
||||||
|
${run.test.modulepath}
|
||||||
|
# This directory is removed when the project is cleaned:
|
||||||
|
dist.dir=dist
|
||||||
|
dist.jar=${dist.dir}/tess4j-${version}.jar
|
||||||
|
dist.javadoc.dir=${dist.dir}/javadoc
|
||||||
|
endorsed.classpath=
|
||||||
|
excludes=
|
||||||
|
file.reference.commons-beanutils-1.9.2.jar=lib/commons-beanutils-1.9.2.jar
|
||||||
|
file.reference.commons-io-2.6.jar=lib/commons-io-2.6.jar
|
||||||
|
file.reference.commons-logging-1.2.jar=lib/commons-logging-1.2.jar
|
||||||
|
file.reference.fontbox-2.0.9.jar=lib/fontbox-2.0.9.jar
|
||||||
|
file.reference.ghost4j-1.0.1.jar=lib/ghost4j-1.0.1.jar
|
||||||
|
file.reference.itext-2.1.7.jar=lib/itext-2.1.7.jar
|
||||||
|
file.reference.jai-imageio-core-1.4.0.jar=lib/jai-imageio-core-1.4.0.jar
|
||||||
|
file.reference.jbig2-imageio-3.0.0.jar=lib/jbig2-imageio-3.0.0.jar
|
||||||
|
file.reference.jboss-vfs-3.2.12.Final.jar=lib/jboss-vfs-3.2.12.Final.jar
|
||||||
|
file.reference.jna-4.1.0.jar=lib/jna-4.1.0.jar
|
||||||
|
file.reference.jul-to-slf4j-1.7.25.jar=lib/jul-to-slf4j-1.7.25.jar
|
||||||
|
file.reference.lept4j-1.6.4.jar=lib/lept4j-1.6.4.jar
|
||||||
|
file.reference.log4j-1.2.17.jar=lib/log4j-1.2.17.jar
|
||||||
|
file.reference.logback-classic-1.2.3.jar=lib/logback-classic-1.2.3.jar
|
||||||
|
file.reference.logback-core-1.2.3.jar=lib/logback-core-1.2.3.jar
|
||||||
|
file.reference.pdfbox-2.0.9.jar=lib/pdfbox-2.0.9.jar
|
||||||
|
file.reference.pdfbox-tools-2.0.9.jar=lib/pdfbox-tools-2.0.9.jar
|
||||||
|
file.reference.slf4j-api-1.7.25.jar=lib/slf4j-api-1.7.25.jar
|
||||||
|
includes=**
|
||||||
|
jar.archive.disabled=${jnlp.enabled}
|
||||||
|
jar.compress=false
|
||||||
|
jar.index=${jnlp.enabled}
|
||||||
|
javac.classpath=\
|
||||||
|
${file.reference.ghost4j-1.0.1.jar}:\
|
||||||
|
${file.reference.jna-4.1.0.jar}:\
|
||||||
|
${file.reference.log4j-1.2.17.jar}:\
|
||||||
|
${file.reference.itext-2.1.7.jar}:\
|
||||||
|
${file.reference.commons-beanutils-1.9.2.jar}:\
|
||||||
|
${file.reference.commons-logging-1.2.jar}:\
|
||||||
|
${file.reference.slf4j-api-1.7.25.jar}:\
|
||||||
|
${file.reference.jul-to-slf4j-1.7.25.jar}:\
|
||||||
|
${file.reference.commons-io-2.6.jar}:\
|
||||||
|
${file.reference.jboss-vfs-3.2.12.Final.jar}:\
|
||||||
|
${file.reference.logback-classic-1.2.3.jar}:\
|
||||||
|
${file.reference.logback-core-1.2.3.jar}:\
|
||||||
|
${file.reference.jai-imageio-core-1.4.0.jar}:\
|
||||||
|
${file.reference.lept4j-1.6.4.jar}:\
|
||||||
|
${file.reference.pdfbox-2.0.9.jar}:\
|
||||||
|
${file.reference.pdfbox-tools-2.0.9.jar}:\
|
||||||
|
${file.reference.fontbox-2.0.9.jar}:\
|
||||||
|
${file.reference.jbig2-imageio-3.0.0.jar}
|
||||||
|
# Space-separated list of extra javac options
|
||||||
|
javac.compilerargs=
|
||||||
|
javac.deprecation=true
|
||||||
|
javac.external.vm=false
|
||||||
|
javac.modulepath=
|
||||||
|
javac.processormodulepath=
|
||||||
|
javac.processorpath=\
|
||||||
|
${javac.classpath}
|
||||||
|
javac.source=1.7
|
||||||
|
javac.target=1.7
|
||||||
|
javac.test.classpath=\
|
||||||
|
${javac.classpath}:\
|
||||||
|
${build.classes.dir}:\
|
||||||
|
${libs.junit_4.classpath}:\
|
||||||
|
${libs.hamcrest.classpath}
|
||||||
|
javac.test.modulepath=\
|
||||||
|
${javac.modulepath}
|
||||||
|
javac.test.processorpath=\
|
||||||
|
${javac.test.classpath}
|
||||||
|
javadoc.additionalparam=
|
||||||
|
javadoc.author=false
|
||||||
|
javadoc.encoding=${source.encoding}
|
||||||
|
javadoc.html5=false
|
||||||
|
javadoc.noindex=false
|
||||||
|
javadoc.nonavbar=false
|
||||||
|
javadoc.notree=false
|
||||||
|
javadoc.private=false
|
||||||
|
javadoc.splitindex=true
|
||||||
|
javadoc.use=true
|
||||||
|
javadoc.version=false
|
||||||
|
javadoc.windowtitle=Tess4J API
|
||||||
|
jlink.launcher=false
|
||||||
|
jlink.launcher.name=Tess4J
|
||||||
|
jnlp.codebase.type=no.codebase
|
||||||
|
jnlp.descriptor=application
|
||||||
|
jnlp.enabled=false
|
||||||
|
jnlp.mixed.code=default
|
||||||
|
jnlp.offline-allowed=false
|
||||||
|
jnlp.signed=false
|
||||||
|
jnlp.signing=
|
||||||
|
jnlp.signing.alias=
|
||||||
|
jnlp.signing.keystore=
|
||||||
|
meta.inf.dir=${src.dir}/META-INF
|
||||||
|
mkdist.disabled=false
|
||||||
|
platform.active=default_platform
|
||||||
|
run.classpath=\
|
||||||
|
${javac.classpath}:\
|
||||||
|
${build.classes.dir}
|
||||||
|
# Space-separated list of JVM arguments used when running the project
|
||||||
|
# (you may also define separate properties like run-sys-prop.name=value instead of -Dname=value
|
||||||
|
# or test-sys-prop.name=value to set system properties for unit tests):
|
||||||
|
run.jvmargs=
|
||||||
|
run.modulepath=\
|
||||||
|
${javac.modulepath}
|
||||||
|
run.test.classpath=\
|
||||||
|
${javac.test.classpath}:\
|
||||||
|
${build.test.classes.dir}
|
||||||
|
run.test.modulepath=\
|
||||||
|
${javac.test.modulepath}
|
||||||
|
source.encoding=UTF-8
|
||||||
|
src.dir=src
|
||||||
|
test.src.dir=test
|
19
NGCC/Tess4J/nbproject/project.xml
Normal file
19
NGCC/Tess4J/nbproject/project.xml
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project xmlns="http://www.netbeans.org/ns/project/1">
|
||||||
|
<type>org.netbeans.modules.java.j2seproject</type>
|
||||||
|
<configuration>
|
||||||
|
<data xmlns="http://www.netbeans.org/ns/j2se-project/3">
|
||||||
|
<name>Tess4J</name>
|
||||||
|
<source-roots>
|
||||||
|
<root id="src.dir"/>
|
||||||
|
</source-roots>
|
||||||
|
<test-roots>
|
||||||
|
<root id="test.src.dir"/>
|
||||||
|
</test-roots>
|
||||||
|
</data>
|
||||||
|
<libraries xmlns="http://www.netbeans.org/ns/ant-project-libraries/1">
|
||||||
|
<definitions>.\lib\nblibraries.properties</definitions>
|
||||||
|
</libraries>
|
||||||
|
<references xmlns="http://www.netbeans.org/ns/ant-project-references/1"/>
|
||||||
|
</configuration>
|
||||||
|
</project>
|
124
NGCC/Tess4J/readme.html
Normal file
124
NGCC/Tess4J/readme.html
Normal file
@ -0,0 +1,124 @@
|
|||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||||
|
<title>Tess4J - Java Wrapper for Tesseract OCR API</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="Section1">
|
||||||
|
<h2 align="center">
|
||||||
|
Tess4J
|
||||||
|
</h2>
|
||||||
|
<h3>
|
||||||
|
DESCRIPTION
|
||||||
|
</h3>
|
||||||
|
<p>
|
||||||
|
Tess4J is a JNA wrapper for <a href="https://github.com/tesseract-ocr">Tesseract OCR
|
||||||
|
API</a>; it provides character recognition support for common image formats,
|
||||||
|
multi-page images, and PDF documents. The library has been developed and tested
|
||||||
|
on Windows and Linux.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
Tess4J is released and distributed under the <a href="http://www.apache.org/licenses/LICENSE-2.0.html">
|
||||||
|
Apache License, v2.0</a>. Its official homepage is at <a href="http://tess4j.sourceforge.net/">
|
||||||
|
http://tess4j.sourceforge.net</a>.
|
||||||
|
</p>
|
||||||
|
<h3>
|
||||||
|
SOFTWARE REQUIREMENTS
|
||||||
|
</h3>
|
||||||
|
<p>
|
||||||
|
<a href="http://java.oracle.com/">Java Runtime Environment</a>, <a href="https://github.com/twall/jna">
|
||||||
|
JNA</a>, and <a href="https://java.net/projects/jai-imageio">JAI-ImageIO</a>
|
||||||
|
are required. <a href="http://ant.apache.org/">Apache Ant</a> and <a href="http://www.junit.org/">
|
||||||
|
JUnit</a> are used for program building and unit testing. The Tesseract DLLs
|
||||||
|
were built with VS2015 and therefore depend on the <a href="https://www.microsoft.com/en-us/download/details.aspx?id=53587">
|
||||||
|
Visual C++ 2015 Redistributable Packages</a>.
|
||||||
|
</p>
|
||||||
|
<h3>
|
||||||
|
INSTRUCTIONS
|
||||||
|
</h3>
|
||||||
|
<p>
|
||||||
|
Tesseract 3.05.01 and Leptonica 1.74.4 (via Lept4J) 32- and 64-bit
|
||||||
|
DLLs, language data for English, and sample images are bundled with the library.
|
||||||
|
<a href="https://github.com/tesseract-ocr/tessdata">Language data packs</a> for
|
||||||
|
Tesseract should be decompressed and placed into the <code>tessdata</code> folder.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
The Linux shared object library (<code>libtesseract.so</code>) equivalent to the
|
||||||
|
DLL is available in Tesseract 3.05.01, which can be built from the <a href="https://github.com/tesseract-ocr/tesseract"
|
||||||
|
target="_blank">source</a> with the instructions given in <a href="https://github.com/tesseract-ocr/tesseract/wiki/Compiling"
|
||||||
|
target="_blank">Tesseract Wiki</a>.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
To unit test, at the command line, execute:
|
||||||
|
</p>
|
||||||
|
<blockquote>
|
||||||
|
<p>
|
||||||
|
<code>ant test</code>
|
||||||
|
</p>
|
||||||
|
</blockquote>
|
||||||
|
<p>
|
||||||
|
Support for PDF documents is available through either
|
||||||
|
<a href="http://www.ghostscript.com/" target="_blank">GPL Ghostscript</a>, which should be installed and included
|
||||||
|
in system path, or PDFBox, if Ghostscript is not available.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
Images to be OCRed should be scanned at resolution from at least 200 DPI (dot per
|
||||||
|
inch) to 400 DPI in monochrome (black&white) or grayscale. Scanning at higher
|
||||||
|
resolutions will not necessarily result in better recognition accuracy. The actual
|
||||||
|
success rates depend greatly on the quality of the scanned image. The typical settings
|
||||||
|
for scanning are 300 DPI and 1 bpp (bit per pixel) black&white or 8 bpp grayscale
|
||||||
|
uncompressed TIFF or PNG format. PNG is usually smaller in size than other image
|
||||||
|
formats and still keeps high quality due to its employing lossless data compression
|
||||||
|
algorithms; TIFF has the advantage of the ability to contain multiple images (pages)
|
||||||
|
in a file.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
Several built-in functions are also provided for merging several images or PDF files
|
||||||
|
into a single one for convenient OCR operations, or for splitting a PDF file into
|
||||||
|
smaller ones if it is too large, which can cause out-of-memory exceptions.
|
||||||
|
</p>
|
||||||
|
<h3>
|
||||||
|
CODE EXAMPLES
|
||||||
|
</h3>
|
||||||
|
<p>
|
||||||
|
The following code example shows common usage of the library. Make sure <code>tessdata</code>
|
||||||
|
folder is populated with appropriate language data files and the <code>.jar</code>
|
||||||
|
files are in the classpath. On Windows, the DLLs will be automatically extracted
|
||||||
|
from <code>tess4j.jar</code> to the default temporary directory and loaded.
|
||||||
|
</p>
|
||||||
|
<blockquote>
|
||||||
|
<pre>
|
||||||
|
package net.sourceforge.tess4j.example;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import net.sourceforge.tess4j.*;
|
||||||
|
|
||||||
|
public class TesseractExample {
|
||||||
|
public static void main(String[] args) {
|
||||||
|
// ImageIO.scanForPlugins(); // for server environment
|
||||||
|
File imageFile = new File("eurotext.tif");
|
||||||
|
ITesseract instance = new Tesseract(); // JNA Interface Mapping
|
||||||
|
// ITesseract instance = new Tesseract1(); // JNA Direct Mapping
|
||||||
|
// instance.setDatapath("<parentPath>"); // replace <parentPath> with path to parent directory of tessdata
|
||||||
|
// instance.setLanguage("eng");
|
||||||
|
|
||||||
|
try {
|
||||||
|
String result = instance.doOCR(imageFile);
|
||||||
|
System.out.println(result);
|
||||||
|
} catch (TesseractException e) {
|
||||||
|
System.err.println(e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
</pre>
|
||||||
|
</blockquote>
|
||||||
|
<h3>
|
||||||
|
DOCUMENTATIONS
|
||||||
|
</h3>
|
||||||
|
<p>
|
||||||
|
Please visit the website for the library's <a href="http://tess4j.sf.net/docs/">documentations</a>
|
||||||
|
</p>
|
||||||
|
<hr />
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
@ -0,0 +1,175 @@
|
|||||||
|
/**
|
||||||
|
* <a url=http://www.jdeskew.com/>JDeskew</a>
|
||||||
|
*/
|
||||||
|
package com.recognition.software.jdeskew;
|
||||||
|
|
||||||
|
import java.awt.image.BufferedImage;
|
||||||
|
|
||||||
|
public class ImageDeskew {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Representation of a line in the image.
|
||||||
|
*/
|
||||||
|
public class HoughLine {
|
||||||
|
|
||||||
|
// count of points in the line
|
||||||
|
public int count = 0;
|
||||||
|
// index in matrix.
|
||||||
|
public int index = 0;
|
||||||
|
// the line is represented as all x, y that solve y * cos(alpha) - x *
|
||||||
|
// sin(alpha) = d
|
||||||
|
public double alpha;
|
||||||
|
public double d;
|
||||||
|
}
|
||||||
|
|
||||||
|
// the source image
|
||||||
|
private BufferedImage cImage;
|
||||||
|
// the range of angles to search for lines
|
||||||
|
private double cAlphaStart = -20;
|
||||||
|
private double cAlphaStep = 0.2;
|
||||||
|
private int cSteps = 40 * 5;
|
||||||
|
// pre-calculation of sin and cos
|
||||||
|
private double[] cSinA;
|
||||||
|
private double[] cCosA;
|
||||||
|
// range of d
|
||||||
|
private double cDMin;
|
||||||
|
private double cDStep = 1.0;
|
||||||
|
private int cDCount;
|
||||||
|
// count of points that fit in a line
|
||||||
|
private int[] cHMatrix;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor.
|
||||||
|
*
|
||||||
|
* @param image
|
||||||
|
*/
|
||||||
|
public ImageDeskew(BufferedImage image) {
|
||||||
|
this.cImage = image;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculates the skew angle of the image cImage.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public double getSkewAngle() {
|
||||||
|
ImageDeskew.HoughLine[] hl;
|
||||||
|
double sum = 0.0;
|
||||||
|
int count = 0;
|
||||||
|
|
||||||
|
// perform Hough Transformation
|
||||||
|
calc();
|
||||||
|
// top 20 of the detected lines in the image
|
||||||
|
hl = getTop(20);
|
||||||
|
|
||||||
|
if (hl.length >= 20) {
|
||||||
|
// average angle of the lines
|
||||||
|
for (int i = 0; i < 19; i++) {
|
||||||
|
sum += hl[i].alpha;
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
return (sum / count);
|
||||||
|
} else {
|
||||||
|
return 0.0d;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// calculate the count lines in the image with most points
|
||||||
|
private ImageDeskew.HoughLine[] getTop(int count) {
|
||||||
|
|
||||||
|
ImageDeskew.HoughLine[] hl = new ImageDeskew.HoughLine[count];
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
hl[i] = new ImageDeskew.HoughLine();
|
||||||
|
}
|
||||||
|
|
||||||
|
ImageDeskew.HoughLine tmp;
|
||||||
|
|
||||||
|
for (int i = 0; i < (this.cHMatrix.length - 1); i++) {
|
||||||
|
if (this.cHMatrix[i] > hl[count - 1].count) {
|
||||||
|
hl[count - 1].count = this.cHMatrix[i];
|
||||||
|
hl[count - 1].index = i;
|
||||||
|
int j = count - 1;
|
||||||
|
while ((j > 0) && (hl[j].count > hl[j - 1].count)) {
|
||||||
|
tmp = hl[j];
|
||||||
|
hl[j] = hl[j - 1];
|
||||||
|
hl[j - 1] = tmp;
|
||||||
|
j--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int alphaIndex;
|
||||||
|
int dIndex;
|
||||||
|
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
dIndex = hl[i].index / cSteps; // integer division, no
|
||||||
|
// remainder
|
||||||
|
alphaIndex = hl[i].index - dIndex * cSteps;
|
||||||
|
hl[i].alpha = getAlpha(alphaIndex);
|
||||||
|
hl[i].d = dIndex + cDMin;
|
||||||
|
}
|
||||||
|
|
||||||
|
return hl;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hough Transformation
|
||||||
|
private void calc() {
|
||||||
|
int hMin = (int) ((this.cImage.getHeight()) / 4.0);
|
||||||
|
int hMax = (int) ((this.cImage.getHeight()) * 3.0 / 4.0);
|
||||||
|
init();
|
||||||
|
|
||||||
|
for (int y = hMin; y < hMax; y++) {
|
||||||
|
for (int x = 1; x < (this.cImage.getWidth() - 2); x++) {
|
||||||
|
// only lower edges are considered
|
||||||
|
if (ImageUtil.isBlack(this.cImage, x, y)) {
|
||||||
|
if (!ImageUtil.isBlack(this.cImage, x, y + 1)) {
|
||||||
|
calc(x, y);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// calculate all lines through the point (x,y)
|
||||||
|
private void calc(int x, int y) {
|
||||||
|
double d;
|
||||||
|
int dIndex;
|
||||||
|
int index;
|
||||||
|
|
||||||
|
for (int alpha = 0; alpha < (this.cSteps - 1); alpha++) {
|
||||||
|
d = y * this.cCosA[alpha] - x * this.cSinA[alpha];
|
||||||
|
dIndex = (int) (d - this.cDMin);
|
||||||
|
index = dIndex * this.cSteps + alpha;
|
||||||
|
try {
|
||||||
|
this.cHMatrix[index] += 1;
|
||||||
|
} catch (Exception ex) {
|
||||||
|
System.out.println(ex.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void init() {
|
||||||
|
|
||||||
|
double angle;
|
||||||
|
|
||||||
|
// pre-calculation of sin and cos
|
||||||
|
this.cSinA = new double[this.cSteps - 1];
|
||||||
|
this.cCosA = new double[this.cSteps - 1];
|
||||||
|
|
||||||
|
for (int i = 0; i < (this.cSteps - 1); i++) {
|
||||||
|
angle = getAlpha(i) * Math.PI / 180.0;
|
||||||
|
this.cSinA[i] = Math.sin(angle);
|
||||||
|
this.cCosA[i] = Math.cos(angle);
|
||||||
|
}
|
||||||
|
|
||||||
|
// range of d
|
||||||
|
this.cDMin = -this.cImage.getWidth();
|
||||||
|
this.cDCount = (int) (2.0 * ((this.cImage.getWidth() + this.cImage.getHeight())) / this.cDStep);
|
||||||
|
this.cHMatrix = new int[this.cDCount * this.cSteps];
|
||||||
|
}
|
||||||
|
|
||||||
|
public double getAlpha(int index) {
|
||||||
|
return this.cAlphaStart + (index * this.cAlphaStep);
|
||||||
|
}
|
||||||
|
}
|
132
NGCC/Tess4J/src/com/recognition/software/jdeskew/ImageUtil.java
Normal file
132
NGCC/Tess4J/src/com/recognition/software/jdeskew/ImageUtil.java
Normal file
@ -0,0 +1,132 @@
|
|||||||
|
/**
|
||||||
|
* <a url=http://www.jdeskew.com/>JDeskew</a>
|
||||||
|
*/
|
||||||
|
package com.recognition.software.jdeskew;
|
||||||
|
|
||||||
|
import java.awt.Color;
|
||||||
|
import java.awt.Graphics2D;
|
||||||
|
import java.awt.RenderingHints;
|
||||||
|
import java.awt.geom.AffineTransform;
|
||||||
|
import java.awt.image.BufferedImage;
|
||||||
|
import java.awt.image.WritableRaster;
|
||||||
|
|
||||||
|
public class ImageUtil {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Whether the pixel is black.
|
||||||
|
*
|
||||||
|
* @param image source image
|
||||||
|
* @param x
|
||||||
|
* @param y
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public static boolean isBlack(BufferedImage image, int x, int y) {
|
||||||
|
if (image.getType() == BufferedImage.TYPE_BYTE_BINARY) {
|
||||||
|
WritableRaster raster = image.getRaster();
|
||||||
|
int pixelRGBValue = raster.getSample(x, y, 0);
|
||||||
|
return pixelRGBValue == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int luminanceValue = 140;
|
||||||
|
return isBlack(image, x, y, luminanceValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Whether the pixel is black.
|
||||||
|
*
|
||||||
|
* @param image source image
|
||||||
|
* @param x
|
||||||
|
* @param y
|
||||||
|
* @param luminanceCutOff
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public static boolean isBlack(BufferedImage image, int x, int y, int luminanceCutOff) {
|
||||||
|
int pixelRGBValue;
|
||||||
|
int r;
|
||||||
|
int g;
|
||||||
|
int b;
|
||||||
|
double luminance = 0.0;
|
||||||
|
|
||||||
|
// return white on areas outside of image boundaries
|
||||||
|
if (x < 0 || y < 0 || x > image.getWidth() || y > image.getHeight()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
pixelRGBValue = image.getRGB(x, y);
|
||||||
|
r = (pixelRGBValue >> 16) & 0xff;
|
||||||
|
g = (pixelRGBValue >> 8) & 0xff;
|
||||||
|
b = (pixelRGBValue) & 0xff;
|
||||||
|
luminance = (r * 0.299) + (g * 0.587) + (b * 0.114);
|
||||||
|
} catch (Exception e) {
|
||||||
|
// ignore.
|
||||||
|
}
|
||||||
|
|
||||||
|
return luminance < luminanceCutOff;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Rotates image.
|
||||||
|
*
|
||||||
|
* @param image source image
|
||||||
|
* @param angle by degrees
|
||||||
|
* @param cx x-coordinate of pivot point
|
||||||
|
* @param cy y-coordinate of pivot point
|
||||||
|
* @return rotated image
|
||||||
|
*/
|
||||||
|
public static BufferedImage rotate(BufferedImage image, double angle, int cx, int cy) {
|
||||||
|
int width = image.getWidth(null);
|
||||||
|
int height = image.getHeight(null);
|
||||||
|
|
||||||
|
int minX, minY, maxX, maxY;
|
||||||
|
minX = minY = maxX = maxY = 0;
|
||||||
|
|
||||||
|
int[] corners = {0, 0, width, 0, width, height, 0, height};
|
||||||
|
|
||||||
|
double theta = Math.toRadians(angle);
|
||||||
|
for (int i = 0; i < corners.length; i += 2) {
|
||||||
|
int x = (int) (Math.cos(theta) * (corners[i] - cx)
|
||||||
|
- Math.sin(theta) * (corners[i + 1] - cy) + cx);
|
||||||
|
int y = (int) (Math.sin(theta) * (corners[i] - cx)
|
||||||
|
+ Math.cos(theta) * (corners[i + 1] - cy) + cy);
|
||||||
|
|
||||||
|
if (x > maxX) {
|
||||||
|
maxX = x;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (x < minX) {
|
||||||
|
minX = x;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (y > maxY) {
|
||||||
|
maxY = y;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (y < minY) {
|
||||||
|
minY = y;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
cx = (cx - minX);
|
||||||
|
cy = (cy - minY);
|
||||||
|
|
||||||
|
BufferedImage bi = new BufferedImage((maxX - minX), (maxY - minY),
|
||||||
|
image.getType());
|
||||||
|
Graphics2D g2 = bi.createGraphics();
|
||||||
|
g2.setRenderingHint(RenderingHints.KEY_INTERPOLATION,
|
||||||
|
RenderingHints.VALUE_INTERPOLATION_BICUBIC);
|
||||||
|
|
||||||
|
g2.setBackground(Color.white);
|
||||||
|
g2.fillRect(0, 0, bi.getWidth(), bi.getHeight());
|
||||||
|
|
||||||
|
AffineTransform at = new AffineTransform();
|
||||||
|
at.rotate(theta, cx, cy);
|
||||||
|
|
||||||
|
g2.setTransform(at);
|
||||||
|
g2.drawImage(image, -minX, -minY, null);
|
||||||
|
g2.dispose();
|
||||||
|
|
||||||
|
return bi;
|
||||||
|
}
|
||||||
|
}
|
617
NGCC/Tess4J/src/net/sourceforge/tess4j/ITessAPI.java
Normal file
617
NGCC/Tess4J/src/net/sourceforge/tess4j/ITessAPI.java
Normal file
@ -0,0 +1,617 @@
|
|||||||
|
/**
|
||||||
|
* Copyright @ 2014 Quan Nguyen
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||||
|
* use this file except in compliance with the License. You may obtain a copy of
|
||||||
|
* the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package net.sourceforge.tess4j;
|
||||||
|
|
||||||
|
import com.sun.jna.Callback;
|
||||||
|
import com.sun.jna.NativeLong;
|
||||||
|
import com.sun.jna.Pointer;
|
||||||
|
import com.sun.jna.PointerType;
|
||||||
|
import com.sun.jna.Structure;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An interface represents common TessAPI classes/constants.
|
||||||
|
*/
|
||||||
|
public interface ITessAPI {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* When Tesseract/Cube is initialized we can choose to instantiate/load/run
|
||||||
|
* only the Tesseract part, only the Cube part or both along with the
|
||||||
|
* combiner. The preference of which engine to use is stored in
|
||||||
|
* <code>tessedit_ocr_engine_mode</code>.<br>
|
||||||
|
* <br>
|
||||||
|
* ATTENTION: When modifying this enum, please make sure to make the
|
||||||
|
* appropriate changes to all the enums mirroring it (e.g. OCREngine in
|
||||||
|
* cityblock/workflow/detection/detection_storage.proto). Such enums will
|
||||||
|
* mention the connection to OcrEngineMode in the comments.
|
||||||
|
*/
|
||||||
|
public static interface TessOcrEngineMode {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run Tesseract only - fastest
|
||||||
|
*/
|
||||||
|
public static final int OEM_TESSERACT_ONLY = 0;
|
||||||
|
/**
|
||||||
|
* Run Cube only - better accuracy, but slower
|
||||||
|
*/
|
||||||
|
public static final int OEM_CUBE_ONLY = 1;
|
||||||
|
/**
|
||||||
|
* Run both and combine results - best accuracy
|
||||||
|
*/
|
||||||
|
public static final int OEM_TESSERACT_CUBE_COMBINED = 2;
|
||||||
|
/**
|
||||||
|
* Specify this mode when calling <code>init_*()</code>, to indicate
|
||||||
|
* that any of the above modes should be automatically inferred from the
|
||||||
|
* variables in the language-specific config, command-line configs, or
|
||||||
|
* if not specified in any of the above should be set to the default
|
||||||
|
* <code>OEM_TESSERACT_ONLY</code>.
|
||||||
|
*/
|
||||||
|
public static final int OEM_DEFAULT = 3;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Possible modes for page layout analysis. These *must* be kept in order of
|
||||||
|
* decreasing amount of layout analysis to be done, except for
|
||||||
|
* <code>OSD_ONLY</code>, so that the inequality test macros below work.
|
||||||
|
*/
|
||||||
|
public static interface TessPageSegMode {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Orientation and script detection only.
|
||||||
|
*/
|
||||||
|
public static final int PSM_OSD_ONLY = 0;
|
||||||
|
/**
|
||||||
|
* Automatic page segmentation with orientation and script detection.
|
||||||
|
* (OSD)
|
||||||
|
*/
|
||||||
|
public static final int PSM_AUTO_OSD = 1;
|
||||||
|
/**
|
||||||
|
* Automatic page segmentation, but no OSD, or OCR.
|
||||||
|
*/
|
||||||
|
public static final int PSM_AUTO_ONLY = 2;
|
||||||
|
/**
|
||||||
|
* Fully automatic page segmentation, but no OSD.
|
||||||
|
*/
|
||||||
|
public static final int PSM_AUTO = 3;
|
||||||
|
/**
|
||||||
|
* Assume a single column of text of variable sizes.
|
||||||
|
*/
|
||||||
|
public static final int PSM_SINGLE_COLUMN = 4;
|
||||||
|
/**
|
||||||
|
* Assume a single uniform block of vertically aligned text.
|
||||||
|
*/
|
||||||
|
public static final int PSM_SINGLE_BLOCK_VERT_TEXT = 5;
|
||||||
|
/**
|
||||||
|
* Assume a single uniform block of text.
|
||||||
|
*/
|
||||||
|
public static final int PSM_SINGLE_BLOCK = 6;
|
||||||
|
/**
|
||||||
|
* Treat the image as a single text line.
|
||||||
|
*/
|
||||||
|
public static final int PSM_SINGLE_LINE = 7;
|
||||||
|
/**
|
||||||
|
* Treat the image as a single word.
|
||||||
|
*/
|
||||||
|
public static final int PSM_SINGLE_WORD = 8;
|
||||||
|
/**
|
||||||
|
* Treat the image as a single word in a circle.
|
||||||
|
*/
|
||||||
|
public static final int PSM_CIRCLE_WORD = 9;
|
||||||
|
/**
|
||||||
|
* Treat the image as a single character.
|
||||||
|
*/
|
||||||
|
public static final int PSM_SINGLE_CHAR = 10;
|
||||||
|
/**
|
||||||
|
* Find as much text as possible in no particular order.
|
||||||
|
*/
|
||||||
|
public static final int PSM_SPARSE_TEXT = 11;
|
||||||
|
/**
|
||||||
|
* Sparse text with orientation and script detection.
|
||||||
|
*/
|
||||||
|
public static final int PSM_SPARSE_TEXT_OSD = 12;
|
||||||
|
/**
|
||||||
|
* Number of enum entries.
|
||||||
|
*/
|
||||||
|
public static final int PSM_COUNT = 13;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Enum of the elements of the page hierarchy, used in
|
||||||
|
* <code>ResultIterator</code> to provide functions that operate on each
|
||||||
|
* level without having to have 5x as many functions.
|
||||||
|
*/
|
||||||
|
public static interface TessPageIteratorLevel {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Block of text/image/separator line.
|
||||||
|
*/
|
||||||
|
public static final int RIL_BLOCK = 0;
|
||||||
|
/**
|
||||||
|
* Paragraph within a block.
|
||||||
|
*/
|
||||||
|
public static final int RIL_PARA = 1;
|
||||||
|
/**
|
||||||
|
* Line within a paragraph.
|
||||||
|
*/
|
||||||
|
public static final int RIL_TEXTLINE = 2;
|
||||||
|
/**
|
||||||
|
* Word within a textline.
|
||||||
|
*/
|
||||||
|
public static final int RIL_WORD = 3;
|
||||||
|
/**
|
||||||
|
* Symbol/character within a word.
|
||||||
|
*/
|
||||||
|
public static final int RIL_SYMBOL = 4;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Possible types for a POLY_BLOCK or ColPartition. Must be kept in sync
|
||||||
|
* with <code>kPBColors</code> in polyblk.cpp and <code>PTIs*Type</code>
|
||||||
|
* functions below, as well as <code>kPolyBlockNames</code> in
|
||||||
|
* publictypes.cpp. Used extensively by ColPartition, and POLY_BLOCK.
|
||||||
|
*/
|
||||||
|
public static interface TessPolyBlockType {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Type is not yet known. Keep as the first element.
|
||||||
|
*/
|
||||||
|
public static final int PT_UNKNOWN = 0;
|
||||||
|
/**
|
||||||
|
* Text that lives inside a column.
|
||||||
|
*/
|
||||||
|
public static final int PT_FLOWING_TEXT = 1;
|
||||||
|
/**
|
||||||
|
* Text that spans more than one column.
|
||||||
|
*/
|
||||||
|
public static final int PT_HEADING_TEXT = 2;
|
||||||
|
/**
|
||||||
|
* Text that is in a cross-column pull-out region.
|
||||||
|
*/
|
||||||
|
public static final int PT_PULLOUT_TEXT = 3;
|
||||||
|
/**
|
||||||
|
* Partition belonging to an equation region.
|
||||||
|
*/
|
||||||
|
public static final int PT_EQUATION = 4;
|
||||||
|
/**
|
||||||
|
* Partition has inline equation.
|
||||||
|
*/
|
||||||
|
public static final int PT_INLINE_EQUATION = 5;
|
||||||
|
/**
|
||||||
|
* Partition belonging to a table region.
|
||||||
|
*/
|
||||||
|
public static final int PT_TABLE = 6;
|
||||||
|
/**
|
||||||
|
* Text-line runs vertically.
|
||||||
|
*/
|
||||||
|
public static final int PT_VERTICAL_TEXT = 7;
|
||||||
|
/**
|
||||||
|
* Text that belongs to an image.
|
||||||
|
*/
|
||||||
|
public static final int PT_CAPTION_TEXT = 8;
|
||||||
|
/**
|
||||||
|
* Image that lives inside a column.
|
||||||
|
*/
|
||||||
|
public static final int PT_FLOWING_IMAGE = 9;
|
||||||
|
/**
|
||||||
|
* Image that spans more than one column.
|
||||||
|
*/
|
||||||
|
public static final int PT_HEADING_IMAGE = 10;
|
||||||
|
/**
|
||||||
|
* Image that is in a cross-column pull-out region.
|
||||||
|
*/
|
||||||
|
public static final int PT_PULLOUT_IMAGE = 11;
|
||||||
|
/**
|
||||||
|
* Horizontal Line.
|
||||||
|
*/
|
||||||
|
public static final int PT_HORZ_LINE = 12;
|
||||||
|
/**
|
||||||
|
* Vertical Line.
|
||||||
|
*/
|
||||||
|
public static final int PT_VERT_LINE = 13;
|
||||||
|
/**
|
||||||
|
* Lies outside of any column.
|
||||||
|
*/
|
||||||
|
public static final int PT_NOISE = 14;
|
||||||
|
/**
|
||||||
|
* Number of enum entries.
|
||||||
|
*/
|
||||||
|
public static final int PT_COUNT = 15;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* NOTA BENE: Fully justified paragraphs (text aligned to both left and
|
||||||
|
* right margins) are marked by Tesseract with JUSTIFICATION_LEFT if their
|
||||||
|
* text is written with a left-to-right script and with JUSTIFICATION_RIGHT
|
||||||
|
* if their text is written in a right-to-left script.<br>
|
||||||
|
* <br>
|
||||||
|
* Interpretation for text read in vertical lines: "Left" is wherever the
|
||||||
|
* starting reading position is.
|
||||||
|
*/
|
||||||
|
public static interface TessParagraphJustification {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The alignment is not clearly one of the other options. This could
|
||||||
|
* happen for example if there are only one or two lines of text or the
|
||||||
|
* text looks like source code or poetry.
|
||||||
|
*/
|
||||||
|
public static final int JUSTIFICATION_UNKNOWN = 0;
|
||||||
|
/**
|
||||||
|
* Each line, except possibly the first, is flush to the same left tab
|
||||||
|
* stop.
|
||||||
|
*/
|
||||||
|
public static final int JUSTIFICATION_LEFT = 1;
|
||||||
|
/**
|
||||||
|
* The text lines of the paragraph are centered about a line going down
|
||||||
|
* through their middle of the text lines.
|
||||||
|
*/
|
||||||
|
public static final int JUSTIFICATION_CENTER = 2;
|
||||||
|
/**
|
||||||
|
* Each line, except possibly the first, is flush to the same right tab
|
||||||
|
* stop.
|
||||||
|
*/
|
||||||
|
public static final int JUSTIFICATION_RIGHT = 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <pre>
|
||||||
|
* +------------------+
|
||||||
|
* | 1 Aaaa Aaaa Aaaa |
|
||||||
|
* | Aaa aa aaa aa |
|
||||||
|
* | aaaaaa A aa aaa. |
|
||||||
|
* | 2 |
|
||||||
|
* | ####### c c C |
|
||||||
|
* | ####### c c c |
|
||||||
|
* | < ####### c c c |
|
||||||
|
* | < ####### c c |
|
||||||
|
* | < ####### . c |
|
||||||
|
* | 3 ####### c |
|
||||||
|
* +------------------+
|
||||||
|
* </pre> Orientation Example:
|
||||||
|
* <br>
|
||||||
|
* ====================
|
||||||
|
* <br>
|
||||||
|
* Above is a diagram of some (1) English and (2) Chinese text and a (3)
|
||||||
|
* photo credit.<br>
|
||||||
|
* <br>
|
||||||
|
* Upright Latin characters are represented as A and a. '<' represents a
|
||||||
|
* latin character rotated anti-clockwise 90 degrees. Upright Chinese
|
||||||
|
* characters are represented C and c.<br>
|
||||||
|
* <br> NOTA BENE: enum values here should match goodoc.proto<br>
|
||||||
|
* <br> If you orient your head so that "up" aligns with Orientation, then
|
||||||
|
* the characters will appear "right side up" and readable.<br>
|
||||||
|
* <br>
|
||||||
|
* In the example above, both the English and Chinese paragraphs are
|
||||||
|
* oriented so their "up" is the top of the page (page up). The photo credit
|
||||||
|
* is read with one's head turned leftward ("up" is to page left).<br>
|
||||||
|
* <br>
|
||||||
|
* The values of this enum match the convention of Tesseract's osdetect.h
|
||||||
|
*/
|
||||||
|
public static interface TessOrientation {
|
||||||
|
|
||||||
|
public static final int ORIENTATION_PAGE_UP = 0;
|
||||||
|
public static final int ORIENTATION_PAGE_RIGHT = 1;
|
||||||
|
public static final int ORIENTATION_PAGE_DOWN = 2;
|
||||||
|
public static final int ORIENTATION_PAGE_LEFT = 3;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The grapheme clusters within a line of text are laid out logically in
|
||||||
|
* this direction, judged when looking at the text line rotated so that its
|
||||||
|
* Orientation is "page up".<br>
|
||||||
|
* <br>
|
||||||
|
* For English text, the writing direction is left-to-right. For the Chinese
|
||||||
|
* text in the above example, the writing direction is top-to-bottom.
|
||||||
|
*/
|
||||||
|
public static interface TessWritingDirection {
|
||||||
|
|
||||||
|
public static final int WRITING_DIRECTION_LEFT_TO_RIGHT = 0;
|
||||||
|
public static final int WRITING_DIRECTION_RIGHT_TO_LEFT = 1;
|
||||||
|
public static final int WRITING_DIRECTION_TOP_TO_BOTTOM = 2;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The text lines are read in the given sequence.<br>
|
||||||
|
* <br>
|
||||||
|
* In English, the order is top-to-bottom. In Chinese, vertical text lines
|
||||||
|
* are read right-to-left. Mongolian is written in vertical columns top to
|
||||||
|
* bottom like Chinese, but the lines order left-to right.<br>
|
||||||
|
* <br>
|
||||||
|
* Note that only some combinations make sense. For example,
|
||||||
|
* <code>WRITING_DIRECTION_LEFT_TO_RIGHT</code> implies
|
||||||
|
* <code>TEXTLINE_ORDER_TOP_TO_BOTTOM</code>.
|
||||||
|
*/
|
||||||
|
public static interface TessTextlineOrder {
|
||||||
|
|
||||||
|
public static final int TEXTLINE_ORDER_LEFT_TO_RIGHT = 0;
|
||||||
|
public static final int TEXTLINE_ORDER_RIGHT_TO_LEFT = 1;
|
||||||
|
public static final int TEXTLINE_ORDER_TOP_TO_BOTTOM = 2;
|
||||||
|
};
|
||||||
|
|
||||||
|
public static final int TRUE = 1;
|
||||||
|
public static final int FALSE = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Base class for all tesseract APIs. Specific classes can add ability to
|
||||||
|
* work on different inputs or produce different outputs. This class is
|
||||||
|
* mostly an interface layer on top of the Tesseract instance class to hide
|
||||||
|
* the data types so that users of this class don't have to include any
|
||||||
|
* other Tesseract headers.
|
||||||
|
*/
|
||||||
|
public static class TessBaseAPI extends PointerType {
|
||||||
|
|
||||||
|
public TessBaseAPI(Pointer address) {
|
||||||
|
super(address);
|
||||||
|
}
|
||||||
|
|
||||||
|
public TessBaseAPI() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class to iterate over tesseract page structure, providing access to all
|
||||||
|
* levels of the page hierarchy, without including any tesseract headers or
|
||||||
|
* having to handle any tesseract structures.<br>
|
||||||
|
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||||
|
* therefore can only be used while the TessBaseAPI class still exists and
|
||||||
|
* has not been subjected to a call of <code>Init</code>,
|
||||||
|
* <code>SetImage</code>, <code>Recognize</code>, <code>Clear</code>,
|
||||||
|
* <code>End</code> <code>DetectOS</code>, or anything else that changes the
|
||||||
|
* internal <code>PAGE_RES</code>. See <code>apitypes.h</code> for the
|
||||||
|
* definition of <code>PageIteratorLevel</code>. See also
|
||||||
|
* <code>ResultIterator</code>, derived from <code>PageIterator</code>,
|
||||||
|
* which adds in the ability to access OCR output with text-specific
|
||||||
|
* methods.
|
||||||
|
*/
|
||||||
|
public static class TessPageIterator extends PointerType {
|
||||||
|
|
||||||
|
public TessPageIterator(Pointer address) {
|
||||||
|
super(address);
|
||||||
|
}
|
||||||
|
|
||||||
|
public TessPageIterator() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* MutableIterator adds access to internal data structures.
|
||||||
|
*/
|
||||||
|
public static class TessMutableIterator extends PointerType {
|
||||||
|
|
||||||
|
public TessMutableIterator(Pointer address) {
|
||||||
|
super(address);
|
||||||
|
}
|
||||||
|
|
||||||
|
public TessMutableIterator() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Iterator for tesseract results that is capable of iterating in proper
|
||||||
|
* reading order over Bi Directional (e.g. mixed Hebrew and English) text.
|
||||||
|
* ResultIterator adds text-specific methods for access to OCR output.
|
||||||
|
*/
|
||||||
|
public static class TessResultIterator extends PointerType {
|
||||||
|
|
||||||
|
public TessResultIterator(Pointer address) {
|
||||||
|
super(address);
|
||||||
|
}
|
||||||
|
|
||||||
|
public TessResultIterator() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
public static class TessChoiceIterator extends PointerType {
|
||||||
|
|
||||||
|
public TessChoiceIterator(Pointer address) {
|
||||||
|
super(address);
|
||||||
|
}
|
||||||
|
|
||||||
|
public TessChoiceIterator() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Interface for rendering tesseract results into a document, such as text,
|
||||||
|
* HOCR or pdf. This class is abstract. Specific classes handle individual
|
||||||
|
* formats. This interface is then used to inject the renderer class into
|
||||||
|
* tesseract when processing images.
|
||||||
|
*
|
||||||
|
* For simplicity implementing this with tesseract version 3.01, the
|
||||||
|
* renderer contains document state that is cleared from document to
|
||||||
|
* document just as the TessBaseAPI is. This way the base API can just
|
||||||
|
* delegate its rendering functionality to injected renderers, and the
|
||||||
|
* renderers can manage the associated state needed for the specific formats
|
||||||
|
* in addition to the heuristics for producing it.
|
||||||
|
*/
|
||||||
|
public static class TessResultRenderer extends PointerType {
|
||||||
|
|
||||||
|
public TessResultRenderer(Pointer address) {
|
||||||
|
super(address);
|
||||||
|
}
|
||||||
|
|
||||||
|
public TessResultRenderer() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Description of the output of the OCR engine. This structure is used as
|
||||||
|
* both a progress monitor and the final output header, since it needs to be
|
||||||
|
* a valid progress monitor while the OCR engine is storing its output to
|
||||||
|
* shared memory. During progress, all the buffer info is -1. Progress
|
||||||
|
* starts at 0 and increases to 100 during OCR. No other constraint. Every
|
||||||
|
* progress callback, the OCR engine must set <code>ocr_alive</code> to 1.
|
||||||
|
* The HP side will set <code>ocr_alive</code> to 0. Repeated failure to
|
||||||
|
* reset to 1 indicates that the OCR engine is dead. If the cancel function
|
||||||
|
* is not null then it is called with the number of user words found. If it
|
||||||
|
* returns true then operation is cancelled.
|
||||||
|
*/
|
||||||
|
public static class ETEXT_DESC extends Structure {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* chars in this buffer(0). Total number of UTF-8 bytes for this run.
|
||||||
|
*/
|
||||||
|
public short count;
|
||||||
|
/**
|
||||||
|
* percent complete increasing (0-100)
|
||||||
|
*/
|
||||||
|
public short progress;
|
||||||
|
/**
|
||||||
|
* true if not last
|
||||||
|
*/
|
||||||
|
public byte more_to_come;
|
||||||
|
/**
|
||||||
|
* ocr sets to 1, HP 0
|
||||||
|
*/
|
||||||
|
public byte ocr_alive;
|
||||||
|
/**
|
||||||
|
* for errcode use
|
||||||
|
*/
|
||||||
|
public byte err_code;
|
||||||
|
/**
|
||||||
|
* returns true to cancel
|
||||||
|
*/
|
||||||
|
public CANCEL_FUNC cancel;
|
||||||
|
/**
|
||||||
|
* this or other data for cancel
|
||||||
|
*/
|
||||||
|
public Pointer cancel_this;
|
||||||
|
/**
|
||||||
|
* time to stop if not 0
|
||||||
|
*/
|
||||||
|
public TimeVal end_time;
|
||||||
|
/**
|
||||||
|
* character data
|
||||||
|
*/
|
||||||
|
public EANYCODE_CHAR[] text = new EANYCODE_CHAR[1];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets Field Order.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
protected List getFieldOrder() {
|
||||||
|
return Arrays.asList("count", "progress", "more_to_come", "ocr_alive", "err_code", "cancel", "cancel_this", "end_time", "text");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* It should be noted that the format for char_code for version 2.0 and
|
||||||
|
* beyond is UTF-8, which means that ASCII characters will come out as one
|
||||||
|
* structure but other characters will be returned in two or more instances
|
||||||
|
* of this structure with a single byte of the UTF-8 code in each, but each
|
||||||
|
* will have the same bounding box.<br>
|
||||||
|
* <br>
|
||||||
|
* Programs which want to handle languages with different characters sets
|
||||||
|
* will need to handle extended characters appropriately, but
|
||||||
|
* <strong>all</strong>
|
||||||
|
* code needs to be prepared to receive UTF-8 coded characters for
|
||||||
|
* characters such as bullet and fancy quotes.
|
||||||
|
*/
|
||||||
|
public static class EANYCODE_CHAR extends Structure {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* character itself, one single UTF-8 byte long. A Unicode character may
|
||||||
|
* consist of one or more UTF-8 bytes. Bytes of a character will have
|
||||||
|
* the same bounding box.
|
||||||
|
*/
|
||||||
|
public byte char_code;
|
||||||
|
/**
|
||||||
|
* left of char (-1)
|
||||||
|
*/
|
||||||
|
public short left;
|
||||||
|
/**
|
||||||
|
* right of char (-1)
|
||||||
|
*/
|
||||||
|
public short right;
|
||||||
|
/**
|
||||||
|
* top of char (-1)
|
||||||
|
*/
|
||||||
|
public short top;
|
||||||
|
/**
|
||||||
|
* bottom of char (-1)
|
||||||
|
*/
|
||||||
|
public short bottom;
|
||||||
|
/**
|
||||||
|
* what font (0)
|
||||||
|
*/
|
||||||
|
public short font_index;
|
||||||
|
/**
|
||||||
|
* classification confidence: 0=perfect, 100=reject (0/100)
|
||||||
|
*/
|
||||||
|
public byte confidence;
|
||||||
|
/**
|
||||||
|
* point size of char, 72 = 1 inch, (10)
|
||||||
|
*/
|
||||||
|
public byte point_size;
|
||||||
|
/**
|
||||||
|
* number of spaces before this char (1)
|
||||||
|
*/
|
||||||
|
public byte blanks;
|
||||||
|
/**
|
||||||
|
* char formatting (0)
|
||||||
|
*/
|
||||||
|
public byte formatting;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets Field Order.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
protected List getFieldOrder() {
|
||||||
|
return Arrays.asList("char_code", "left", "right", "top", "bottom", "font_index", "confidence", "point_size", "blanks", "formatting");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Callback for <code>cancel_func</code>.
|
||||||
|
*/
|
||||||
|
interface CANCEL_FUNC extends Callback {
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param cancel_this
|
||||||
|
* @param words
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
boolean invoke(Pointer cancel_this, int words);
|
||||||
|
};
|
||||||
|
|
||||||
|
public static class TimeVal extends Structure {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* seconds
|
||||||
|
*/
|
||||||
|
public NativeLong tv_sec;
|
||||||
|
/**
|
||||||
|
* microseconds
|
||||||
|
*/
|
||||||
|
public NativeLong tv_usec;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<String> getFieldOrder() {
|
||||||
|
return Arrays.asList("tv_sec", "tv_usec");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
236
NGCC/Tess4J/src/net/sourceforge/tess4j/ITesseract.java
Normal file
236
NGCC/Tess4J/src/net/sourceforge/tess4j/ITesseract.java
Normal file
@ -0,0 +1,236 @@
|
|||||||
|
/**
|
||||||
|
* Copyright @ 2014 Quan Nguyen
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||||
|
* use this file except in compliance with the License. You may obtain a copy of
|
||||||
|
* the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package net.sourceforge.tess4j;
|
||||||
|
|
||||||
|
import java.awt.Rectangle;
|
||||||
|
import java.awt.image.BufferedImage;
|
||||||
|
import java.io.File;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.util.List;
|
||||||
|
import javax.imageio.IIOImage;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An interface represents common OCR methods.
|
||||||
|
*/
|
||||||
|
public interface ITesseract {
|
||||||
|
|
||||||
|
String htmlBeginTag = "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\""
|
||||||
|
+ " \"http://www.w3.org/TR/html4/loose.dtd\">\n"
|
||||||
|
+ "<html>\n<head>\n<title></title>\n"
|
||||||
|
+ "<meta http-equiv=\"Content-Type\" content=\"text/html;"
|
||||||
|
+ "charset=utf-8\" />\n<meta name='ocr-system' content='tesseract'/>\n"
|
||||||
|
+ "</head>\n<body>\n";
|
||||||
|
String htmlEndTag = "</body>\n</html>\n";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Rendered formats supported by Tesseract.
|
||||||
|
*/
|
||||||
|
public enum RenderedFormat {
|
||||||
|
|
||||||
|
TEXT, HOCR, PDF, UNLV, BOX
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs OCR operation.
|
||||||
|
*
|
||||||
|
* @param imageFile an image file
|
||||||
|
* @return the recognized text
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
String doOCR(File imageFile) throws TesseractException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs OCR operation.
|
||||||
|
*
|
||||||
|
* @param imageFile an image file
|
||||||
|
* @param rect the bounding rectangle defines the region of the image to be
|
||||||
|
* recognized. A rectangle of zero dimension or <code>null</code> indicates
|
||||||
|
* the whole image.
|
||||||
|
* @return the recognized text
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
String doOCR(File imageFile, Rectangle rect) throws TesseractException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs OCR operation.
|
||||||
|
*
|
||||||
|
* @param bi a buffered image
|
||||||
|
* @return the recognized text
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
String doOCR(BufferedImage bi) throws TesseractException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs OCR operation.
|
||||||
|
*
|
||||||
|
* @param bi a buffered image
|
||||||
|
* @param rect the bounding rectangle defines the region of the image to be
|
||||||
|
* recognized. A rectangle of zero dimension or <code>null</code> indicates
|
||||||
|
* the whole image.
|
||||||
|
* @return the recognized text
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
String doOCR(BufferedImage bi, Rectangle rect) throws TesseractException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs OCR operation.
|
||||||
|
*
|
||||||
|
* @param imageList a list of <code>IIOImage</code> objects
|
||||||
|
* @param rect the bounding rectangle defines the region of the image to be
|
||||||
|
* recognized. A rectangle of zero dimension or <code>null</code> indicates
|
||||||
|
* the whole image.
|
||||||
|
* @return the recognized text
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
String doOCR(List<IIOImage> imageList, Rectangle rect) throws TesseractException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs OCR operation.
|
||||||
|
*
|
||||||
|
* @param imageList a list of <code>IIOImage</code> objects
|
||||||
|
* @param filename input file name. Needed only for training and reading a
|
||||||
|
* UNLV zone file.
|
||||||
|
* @param rect the bounding rectangle defines the region of the image to be
|
||||||
|
* recognized. A rectangle of zero dimension or <code>null</code> indicates
|
||||||
|
* the whole image.
|
||||||
|
* @return the recognized text
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
String doOCR(List<IIOImage> imageList, String filename, Rectangle rect) throws TesseractException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs OCR operation. Use <code>SetImage</code>, (optionally)
|
||||||
|
* <code>SetRectangle</code>, and one or more of the <code>Get*Text</code>
|
||||||
|
* functions.
|
||||||
|
*
|
||||||
|
* @param xsize width of image
|
||||||
|
* @param ysize height of image
|
||||||
|
* @param buf pixel data
|
||||||
|
* @param rect the bounding rectangle defines the region of the image to be
|
||||||
|
* recognized. A rectangle of zero dimension or <code>null</code> indicates
|
||||||
|
* the whole image.
|
||||||
|
* @param bpp bits per pixel, represents the bit depth of the image, with 1
|
||||||
|
* for binary bitmap, 8 for gray, and 24 for color RGB.
|
||||||
|
* @return the recognized text
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
String doOCR(int xsize, int ysize, ByteBuffer buf, Rectangle rect, int bpp) throws TesseractException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs OCR operation. Use <code>SetImage</code>, (optionally)
|
||||||
|
* <code>SetRectangle</code>, and one or more of the <code>Get*Text</code>
|
||||||
|
* functions.
|
||||||
|
*
|
||||||
|
* @param xsize width of image
|
||||||
|
* @param ysize height of image
|
||||||
|
* @param buf pixel data
|
||||||
|
* @param filename input file name. Needed only for training and reading a
|
||||||
|
* UNLV zone file.
|
||||||
|
* @param rect the bounding rectangle defines the region of the image to be
|
||||||
|
* recognized. A rectangle of zero dimension or <code>null</code> indicates
|
||||||
|
* the whole image.
|
||||||
|
* @param bpp bits per pixel, represents the bit depth of the image, with 1
|
||||||
|
* for binary bitmap, 8 for gray, and 24 for color RGB.
|
||||||
|
* @return the recognized text
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
String doOCR(int xsize, int ysize, ByteBuffer buf, String filename, Rectangle rect, int bpp) throws TesseractException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets tessdata path.
|
||||||
|
*
|
||||||
|
* @param datapath the tessdata path to set
|
||||||
|
*/
|
||||||
|
void setDatapath(String datapath);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets language for OCR.
|
||||||
|
*
|
||||||
|
* @param language the language code, which follows ISO 639-3 standard.
|
||||||
|
*/
|
||||||
|
void setLanguage(String language);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets OCR engine mode.
|
||||||
|
*
|
||||||
|
* @param ocrEngineMode the OcrEngineMode to set
|
||||||
|
*/
|
||||||
|
void setOcrEngineMode(int ocrEngineMode);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets page segmentation mode.
|
||||||
|
*
|
||||||
|
* @param mode the page segmentation mode to set
|
||||||
|
*/
|
||||||
|
void setPageSegMode(int mode);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the value of Tesseract's internal parameter.
|
||||||
|
*
|
||||||
|
* @param key variable name, e.g., <code>tessedit_create_hocr</code>,
|
||||||
|
* <code>tessedit_char_whitelist</code>, etc.
|
||||||
|
* @param value value for corresponding variable, e.g., "1", "0",
|
||||||
|
* "0123456789", etc.
|
||||||
|
*/
|
||||||
|
void setTessVariable(String key, String value);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets configs to be passed to Tesseract's <code>Init</code> method.
|
||||||
|
*
|
||||||
|
* @param configs list of config filenames, e.g., "digits", "bazaar",
|
||||||
|
* "quiet"
|
||||||
|
*/
|
||||||
|
void setConfigs(List<String> configs);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates documents for given renderers.
|
||||||
|
*
|
||||||
|
* @param filename input image
|
||||||
|
* @param outputbase output filename without extension
|
||||||
|
* @param formats types of renderers
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
void createDocuments(String filename, String outputbase, List<RenderedFormat> formats) throws TesseractException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates documents for given renderers.
|
||||||
|
*
|
||||||
|
* @param filenames array of input files
|
||||||
|
* @param outputbases array of output filenames without extension
|
||||||
|
* @param formats types of renderers
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
void createDocuments(String[] filenames, String[] outputbases, List<RenderedFormat> formats) throws TesseractException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets segmented regions at specified page iterator level.
|
||||||
|
*
|
||||||
|
* @param bi input image
|
||||||
|
* @param pageIteratorLevel TessPageIteratorLevel enum
|
||||||
|
* @return list of <code>Rectangle</code>
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
List<Rectangle> getSegmentedRegions(BufferedImage bi, int pageIteratorLevel) throws TesseractException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets recognized words at specified page iterator level.
|
||||||
|
*
|
||||||
|
* @param bi input image
|
||||||
|
* @param pageIteratorLevel TessPageIteratorLevel enum
|
||||||
|
* @return list of <code>Word</code>
|
||||||
|
*/
|
||||||
|
List<Word> getWords(BufferedImage bi, int pageIteratorLevel);
|
||||||
|
}
|
1225
NGCC/Tess4J/src/net/sourceforge/tess4j/TessAPI.java
Normal file
1225
NGCC/Tess4J/src/net/sourceforge/tess4j/TessAPI.java
Normal file
File diff suppressed because it is too large
Load Diff
1228
NGCC/Tess4J/src/net/sourceforge/tess4j/TessAPI1.java
Normal file
1228
NGCC/Tess4J/src/net/sourceforge/tess4j/TessAPI1.java
Normal file
File diff suppressed because it is too large
Load Diff
682
NGCC/Tess4J/src/net/sourceforge/tess4j/Tesseract.java
Normal file
682
NGCC/Tess4J/src/net/sourceforge/tess4j/Tesseract.java
Normal file
@ -0,0 +1,682 @@
|
|||||||
|
/**
|
||||||
|
* Copyright @ 2012 Quan Nguyen
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||||
|
* use this file except in compliance with the License. You may obtain a copy of
|
||||||
|
* the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package net.sourceforge.tess4j;
|
||||||
|
|
||||||
|
import com.sun.jna.Pointer;
|
||||||
|
import com.sun.jna.StringArray;
|
||||||
|
import com.sun.jna.ptr.PointerByReference;
|
||||||
|
import java.awt.Rectangle;
|
||||||
|
import java.awt.image.*;
|
||||||
|
import java.io.*;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.IntBuffer;
|
||||||
|
import java.util.*;
|
||||||
|
import javax.imageio.IIOImage;
|
||||||
|
import net.sourceforge.lept4j.Box;
|
||||||
|
import net.sourceforge.lept4j.Boxa;
|
||||||
|
import static net.sourceforge.lept4j.ILeptonica.L_CLONE;
|
||||||
|
import net.sourceforge.lept4j.Leptonica;
|
||||||
|
import static net.sourceforge.tess4j.ITessAPI.TRUE;
|
||||||
|
|
||||||
|
import net.sourceforge.tess4j.ITessAPI.TessBaseAPI;
|
||||||
|
import net.sourceforge.tess4j.ITessAPI.TessOcrEngineMode;
|
||||||
|
import net.sourceforge.tess4j.ITessAPI.TessPageIterator;
|
||||||
|
import net.sourceforge.tess4j.ITessAPI.TessResultIterator;
|
||||||
|
import net.sourceforge.tess4j.ITessAPI.TessResultRenderer;
|
||||||
|
|
||||||
|
import net.sourceforge.tess4j.util.ImageIOHelper;
|
||||||
|
import net.sourceforge.tess4j.util.LoggHelper;
|
||||||
|
import net.sourceforge.tess4j.util.PdfUtilities;
|
||||||
|
import org.slf4j.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An object layer on top of <code>TessAPI</code>, provides character
|
||||||
|
* recognition support for common image formats, and multi-page TIFF images
|
||||||
|
* beyond the uncompressed, binary TIFF format supported by Tesseract OCR
|
||||||
|
* engine. The extended capabilities are provided by the
|
||||||
|
* <code>Java Advanced Imaging Image I/O Tools</code>.<br>
|
||||||
|
* <br>
|
||||||
|
* Support for PDF documents is available through <code>Ghost4J</code>, a
|
||||||
|
* <code>JNA</code> wrapper for <code>GPL Ghostscript</code>, which should be
|
||||||
|
* installed and included in system path.<br>
|
||||||
|
* <br>
|
||||||
|
* Any program that uses the library will need to ensure that the required
|
||||||
|
* libraries (the <code>.jar</code> files for <code>jna</code>,
|
||||||
|
* <code>jai-imageio</code>, and <code>ghost4j</code>) are in its compile and
|
||||||
|
* run-time <code>classpath</code>.
|
||||||
|
*/
|
||||||
|
public class Tesseract implements ITesseract {
|
||||||
|
|
||||||
|
private static Tesseract instance;
|
||||||
|
private String language = "eng";
|
||||||
|
private String datapath;
|
||||||
|
private RenderedFormat renderedFormat = RenderedFormat.TEXT;
|
||||||
|
private int psm = -1;
|
||||||
|
private int ocrEngineMode = TessOcrEngineMode.OEM_DEFAULT;
|
||||||
|
private final Properties prop = new Properties();
|
||||||
|
private final List<String> configList = new ArrayList<String>();
|
||||||
|
|
||||||
|
private TessAPI api;
|
||||||
|
private TessBaseAPI handle;
|
||||||
|
|
||||||
|
private static final org.slf4j.Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
|
||||||
|
|
||||||
|
public Tesseract() {
|
||||||
|
try {
|
||||||
|
datapath = System.getenv("TESSDATA_PREFIX");
|
||||||
|
} catch (Exception e) {
|
||||||
|
// ignore
|
||||||
|
} finally {
|
||||||
|
if (datapath == null) {
|
||||||
|
datapath = "./";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns TessAPI object.
|
||||||
|
*
|
||||||
|
* @return api
|
||||||
|
*/
|
||||||
|
protected TessAPI getAPI() {
|
||||||
|
return api;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns API handle.
|
||||||
|
*
|
||||||
|
* @return handle
|
||||||
|
*/
|
||||||
|
protected TessBaseAPI getHandle() {
|
||||||
|
return handle;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets an instance of the class library.
|
||||||
|
*
|
||||||
|
* @deprecated As of Release 2.0, use default constructor instead.
|
||||||
|
* @return instance
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
|
public static synchronized Tesseract getInstance() {
|
||||||
|
if (instance == null) {
|
||||||
|
instance = new Tesseract();
|
||||||
|
}
|
||||||
|
|
||||||
|
return instance;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets path to <code>tessdata</code>.
|
||||||
|
*
|
||||||
|
* @param datapath the tessdata path to set
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void setDatapath(String datapath) {
|
||||||
|
this.datapath = datapath;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets language for OCR.
|
||||||
|
*
|
||||||
|
* @param language the language code, which follows ISO 639-3 standard.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void setLanguage(String language) {
|
||||||
|
this.language = language;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets OCR engine mode.
|
||||||
|
*
|
||||||
|
* @param ocrEngineMode the OcrEngineMode to set
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void setOcrEngineMode(int ocrEngineMode) {
|
||||||
|
this.ocrEngineMode = ocrEngineMode;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets page segmentation mode.
|
||||||
|
*
|
||||||
|
* @param mode the page segmentation mode to set
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void setPageSegMode(int mode) {
|
||||||
|
this.psm = mode;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Enables hocr output.
|
||||||
|
*
|
||||||
|
* @param hocr to enable or disable hocr output
|
||||||
|
*/
|
||||||
|
public void setHocr(boolean hocr) {
|
||||||
|
this.renderedFormat = hocr ? RenderedFormat.HOCR : RenderedFormat.TEXT;
|
||||||
|
prop.setProperty("tessedit_create_hocr", hocr ? "1" : "0");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the value of Tesseract's internal parameter.
|
||||||
|
*
|
||||||
|
* @param key variable name, e.g., <code>tessedit_create_hocr</code>,
|
||||||
|
* <code>tessedit_char_whitelist</code>, etc.
|
||||||
|
* @param value value for corresponding variable, e.g., "1", "0",
|
||||||
|
* "0123456789", etc.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void setTessVariable(String key, String value) {
|
||||||
|
prop.setProperty(key, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets configs to be passed to Tesseract's <code>Init</code> method.
|
||||||
|
*
|
||||||
|
* @param configs list of config filenames, e.g., "digits", "bazaar",
|
||||||
|
* "quiet"
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void setConfigs(List<String> configs) {
|
||||||
|
configList.clear();
|
||||||
|
if (configs != null) {
|
||||||
|
configList.addAll(configs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs OCR operation.
|
||||||
|
*
|
||||||
|
* @param imageFile an image file
|
||||||
|
* @return the recognized text
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String doOCR(File imageFile) throws TesseractException {
|
||||||
|
return doOCR(imageFile, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs OCR operation.
|
||||||
|
*
|
||||||
|
* @param imageFile an image file
|
||||||
|
* @param rect the bounding rectangle defines the region of the image to be
|
||||||
|
* recognized. A rectangle of zero dimension or <code>null</code> indicates
|
||||||
|
* the whole image.
|
||||||
|
* @return the recognized text
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String doOCR(File imageFile, Rectangle rect) throws TesseractException {
|
||||||
|
try {
|
||||||
|
return doOCR(ImageIOHelper.getIIOImageList(imageFile), imageFile.getPath(), rect);
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error(e.getMessage(), e);
|
||||||
|
throw new TesseractException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs OCR operation.
|
||||||
|
*
|
||||||
|
* @param bi a buffered image
|
||||||
|
* @return the recognized text
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String doOCR(BufferedImage bi) throws TesseractException {
|
||||||
|
return doOCR(bi, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs OCR operation.
|
||||||
|
*
|
||||||
|
* @param bi a buffered image
|
||||||
|
* @param rect the bounding rectangle defines the region of the image to be
|
||||||
|
* recognized. A rectangle of zero dimension or <code>null</code> indicates
|
||||||
|
* the whole image.
|
||||||
|
* @return the recognized text
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String doOCR(BufferedImage bi, Rectangle rect) throws TesseractException {
|
||||||
|
try {
|
||||||
|
return doOCR(ImageIOHelper.getIIOImageList(bi), rect);
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error(e.getMessage(), e);
|
||||||
|
throw new TesseractException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs OCR operation.
|
||||||
|
*
|
||||||
|
* @param imageList a list of <code>IIOImage</code> objects
|
||||||
|
* @param rect the bounding rectangle defines the region of the image to be
|
||||||
|
* recognized. A rectangle of zero dimension or <code>null</code> indicates
|
||||||
|
* the whole image.
|
||||||
|
* @return the recognized text
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String doOCR(List<IIOImage> imageList, Rectangle rect) throws TesseractException {
|
||||||
|
return doOCR(imageList, null, rect);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs OCR operation.
|
||||||
|
*
|
||||||
|
* @param imageList a list of <code>IIOImage</code> objects
|
||||||
|
* @param filename input file name. Needed only for training and reading a
|
||||||
|
* UNLV zone file.
|
||||||
|
* @param rect the bounding rectangle defines the region of the image to be
|
||||||
|
* recognized. A rectangle of zero dimension or <code>null</code> indicates
|
||||||
|
* the whole image.
|
||||||
|
* @return the recognized text
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String doOCR(List<IIOImage> imageList, String filename, Rectangle rect) throws TesseractException {
|
||||||
|
init();
|
||||||
|
setTessVariables();
|
||||||
|
|
||||||
|
try {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
int pageNum = 0;
|
||||||
|
|
||||||
|
for (IIOImage oimage : imageList) {
|
||||||
|
pageNum++;
|
||||||
|
try {
|
||||||
|
setImage(oimage.getRenderedImage(), rect);
|
||||||
|
sb.append(getOCRText(filename, pageNum));
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
// skip the problematic image
|
||||||
|
logger.error(ioe.getMessage(), ioe);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (renderedFormat == RenderedFormat.HOCR) {
|
||||||
|
sb.insert(0, htmlBeginTag).append(htmlEndTag);
|
||||||
|
}
|
||||||
|
|
||||||
|
return sb.toString();
|
||||||
|
} finally {
|
||||||
|
dispose();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs OCR operation. Use <code>SetImage</code>, (optionally)
|
||||||
|
* <code>SetRectangle</code>, and one or more of the <code>Get*Text</code>
|
||||||
|
* functions.
|
||||||
|
*
|
||||||
|
* @param xsize width of image
|
||||||
|
* @param ysize height of image
|
||||||
|
* @param buf pixel data
|
||||||
|
* @param rect the bounding rectangle defines the region of the image to be
|
||||||
|
* recognized. A rectangle of zero dimension or <code>null</code> indicates
|
||||||
|
* the whole image.
|
||||||
|
* @param bpp bits per pixel, represents the bit depth of the image, with 1
|
||||||
|
* for binary bitmap, 8 for gray, and 24 for color RGB.
|
||||||
|
* @return the recognized text
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String doOCR(int xsize, int ysize, ByteBuffer buf, Rectangle rect, int bpp) throws TesseractException {
|
||||||
|
return doOCR(xsize, ysize, buf, null, rect, bpp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs OCR operation. Use <code>SetImage</code>, (optionally)
|
||||||
|
* <code>SetRectangle</code>, and one or more of the <code>Get*Text</code>
|
||||||
|
* functions.
|
||||||
|
*
|
||||||
|
* @param xsize width of image
|
||||||
|
* @param ysize height of image
|
||||||
|
* @param buf pixel data
|
||||||
|
* @param filename input file name. Needed only for training and reading a
|
||||||
|
* UNLV zone file.
|
||||||
|
* @param rect the bounding rectangle defines the region of the image to be
|
||||||
|
* recognized. A rectangle of zero dimension or <code>null</code> indicates
|
||||||
|
* the whole image.
|
||||||
|
* @param bpp bits per pixel, represents the bit depth of the image, with 1
|
||||||
|
* for binary bitmap, 8 for gray, and 24 for color RGB.
|
||||||
|
* @return the recognized text
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String doOCR(int xsize, int ysize, ByteBuffer buf, String filename, Rectangle rect, int bpp) throws TesseractException {
|
||||||
|
init();
|
||||||
|
setTessVariables();
|
||||||
|
|
||||||
|
try {
|
||||||
|
setImage(xsize, ysize, buf, rect, bpp);
|
||||||
|
return getOCRText(filename, 1);
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error(e.getMessage(), e);
|
||||||
|
throw new TesseractException(e);
|
||||||
|
} finally {
|
||||||
|
dispose();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initializes Tesseract engine.
|
||||||
|
*/
|
||||||
|
protected void init() {
|
||||||
|
api = TessAPI.INSTANCE;
|
||||||
|
handle = api.TessBaseAPICreate();
|
||||||
|
StringArray sarray = new StringArray(configList.toArray(new String[0]));
|
||||||
|
PointerByReference configs = new PointerByReference();
|
||||||
|
configs.setPointer(sarray);
|
||||||
|
api.TessBaseAPIInit1(handle, datapath, language, ocrEngineMode, configs, configList.size());
|
||||||
|
if (psm > -1) {
|
||||||
|
api.TessBaseAPISetPageSegMode(handle, psm);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets Tesseract's internal parameters.
|
||||||
|
*/
|
||||||
|
protected void setTessVariables() {
|
||||||
|
Enumeration<?> em = prop.propertyNames();
|
||||||
|
while (em.hasMoreElements()) {
|
||||||
|
String key = (String) em.nextElement();
|
||||||
|
api.TessBaseAPISetVariable(handle, key, prop.getProperty(key));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A wrapper for {@link #setImage(int, int, ByteBuffer, Rectangle, int)}.
|
||||||
|
*
|
||||||
|
* @param image a rendered image
|
||||||
|
* @param rect region of interest
|
||||||
|
* @throws java.io.IOException
|
||||||
|
*/
|
||||||
|
protected void setImage(RenderedImage image, Rectangle rect) throws IOException {
|
||||||
|
setImage(image.getWidth(), image.getHeight(), ImageIOHelper.getImageByteBuffer(image), rect, image
|
||||||
|
.getColorModel().getPixelSize());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets image to be processed.
|
||||||
|
*
|
||||||
|
* @param xsize width of image
|
||||||
|
* @param ysize height of image
|
||||||
|
* @param buf pixel data
|
||||||
|
* @param rect the bounding rectangle defines the region of the image to be
|
||||||
|
* recognized. A rectangle of zero dimension or <code>null</code> indicates
|
||||||
|
* the whole image.
|
||||||
|
* @param bpp bits per pixel, represents the bit depth of the image, with 1
|
||||||
|
* for binary bitmap, 8 for gray, and 24 for color RGB.
|
||||||
|
*/
|
||||||
|
protected void setImage(int xsize, int ysize, ByteBuffer buf, Rectangle rect, int bpp) {
|
||||||
|
int bytespp = bpp / 8;
|
||||||
|
int bytespl = (int) Math.ceil(xsize * bpp / 8.0);
|
||||||
|
api.TessBaseAPISetImage(handle, buf, xsize, ysize, bytespp, bytespl);
|
||||||
|
|
||||||
|
if (rect != null && !rect.isEmpty()) {
|
||||||
|
api.TessBaseAPISetRectangle(handle, rect.x, rect.y, rect.width, rect.height);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets recognized text.
|
||||||
|
*
|
||||||
|
* @param filename input file name. Needed only for reading a UNLV zone
|
||||||
|
* file.
|
||||||
|
* @param pageNum page number; needed for hocr paging.
|
||||||
|
* @return the recognized text
|
||||||
|
*/
|
||||||
|
protected String getOCRText(String filename, int pageNum) {
|
||||||
|
if (filename != null && !filename.isEmpty()) {
|
||||||
|
api.TessBaseAPISetInputName(handle, filename);
|
||||||
|
}
|
||||||
|
|
||||||
|
Pointer utf8Text = renderedFormat == RenderedFormat.HOCR ? api.TessBaseAPIGetHOCRText(handle, pageNum - 1) : api.TessBaseAPIGetUTF8Text(handle);
|
||||||
|
String str = utf8Text.getString(0);
|
||||||
|
api.TessDeleteText(utf8Text);
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates renderers for given formats.
|
||||||
|
*
|
||||||
|
* @param outputbase
|
||||||
|
* @param formats
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
private TessResultRenderer createRenderers(String outputbase, List<RenderedFormat> formats) {
|
||||||
|
TessResultRenderer renderer = null;
|
||||||
|
|
||||||
|
for (RenderedFormat format : formats) {
|
||||||
|
switch (format) {
|
||||||
|
case TEXT:
|
||||||
|
if (renderer == null) {
|
||||||
|
renderer = api.TessTextRendererCreate(outputbase);
|
||||||
|
} else {
|
||||||
|
api.TessResultRendererInsert(renderer, api.TessTextRendererCreate(outputbase));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case HOCR:
|
||||||
|
if (renderer == null) {
|
||||||
|
renderer = api.TessHOcrRendererCreate(outputbase);
|
||||||
|
} else {
|
||||||
|
api.TessResultRendererInsert(renderer, api.TessHOcrRendererCreate(outputbase));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case PDF:
|
||||||
|
String dataPath = api.TessBaseAPIGetDatapath(handle);
|
||||||
|
if (renderer == null) {
|
||||||
|
renderer = api.TessPDFRendererCreate(outputbase, dataPath);
|
||||||
|
} else {
|
||||||
|
api.TessResultRendererInsert(renderer, api.TessPDFRendererCreate(outputbase, dataPath));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case BOX:
|
||||||
|
if (renderer == null) {
|
||||||
|
renderer = api.TessBoxTextRendererCreate(outputbase);
|
||||||
|
} else {
|
||||||
|
api.TessResultRendererInsert(renderer, api.TessBoxTextRendererCreate(outputbase));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case UNLV:
|
||||||
|
if (renderer == null) {
|
||||||
|
renderer = api.TessUnlvRendererCreate(outputbase);
|
||||||
|
} else {
|
||||||
|
api.TessResultRendererInsert(renderer, api.TessUnlvRendererCreate(outputbase));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return renderer;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates documents for given renderer.
|
||||||
|
*
|
||||||
|
* @param filename input image
|
||||||
|
* @param outputbase output filename without extension
|
||||||
|
* @param formats types of renderer
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void createDocuments(String filename, String outputbase, List<RenderedFormat> formats) throws TesseractException {
|
||||||
|
createDocuments(new String[]{filename}, new String[]{outputbase}, formats);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates documents.
|
||||||
|
*
|
||||||
|
* @param filenames array of input files
|
||||||
|
* @param outputbases array of output filenames without extension
|
||||||
|
* @param formats types of renderer
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void createDocuments(String[] filenames, String[] outputbases, List<RenderedFormat> formats) throws TesseractException {
|
||||||
|
if (filenames.length != outputbases.length) {
|
||||||
|
throw new RuntimeException("The two arrays must match in length.");
|
||||||
|
}
|
||||||
|
|
||||||
|
init();
|
||||||
|
setTessVariables();
|
||||||
|
|
||||||
|
try {
|
||||||
|
for (int i = 0; i < filenames.length; i++) {
|
||||||
|
File workingTiffFile = null;
|
||||||
|
try {
|
||||||
|
String filename = filenames[i];
|
||||||
|
|
||||||
|
// if PDF, convert to multi-page TIFF
|
||||||
|
if (filename.toLowerCase().endsWith(".pdf")) {
|
||||||
|
workingTiffFile = PdfUtilities.convertPdf2Tiff(new File(filename));
|
||||||
|
filename = workingTiffFile.getPath();
|
||||||
|
}
|
||||||
|
|
||||||
|
TessResultRenderer renderer = createRenderers(outputbases[i], formats);
|
||||||
|
createDocuments(filename, renderer);
|
||||||
|
api.TessDeleteResultRenderer(renderer);
|
||||||
|
} catch (Exception e) {
|
||||||
|
// skip the problematic image file
|
||||||
|
logger.error(e.getMessage(), e);
|
||||||
|
} finally {
|
||||||
|
if (workingTiffFile != null && workingTiffFile.exists()) {
|
||||||
|
workingTiffFile.delete();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
dispose();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates documents.
|
||||||
|
*
|
||||||
|
* @param filename input file
|
||||||
|
* @param renderer renderer
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
private void createDocuments(String filename, TessResultRenderer renderer) throws TesseractException {
|
||||||
|
api.TessBaseAPISetInputName(handle, filename); //for reading a UNLV zone file
|
||||||
|
int result = api.TessBaseAPIProcessPages(handle, filename, null, 0, renderer);
|
||||||
|
|
||||||
|
if (result == ITessAPI.FALSE) {
|
||||||
|
throw new TesseractException("Error during processing page.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets segmented regions at specified page iterator level.
|
||||||
|
*
|
||||||
|
* @param bi input image
|
||||||
|
* @param pageIteratorLevel TessPageIteratorLevel enum
|
||||||
|
* @return list of <code>Rectangle</code>
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public List<Rectangle> getSegmentedRegions(BufferedImage bi, int pageIteratorLevel) throws TesseractException {
|
||||||
|
init();
|
||||||
|
setTessVariables();
|
||||||
|
|
||||||
|
try {
|
||||||
|
List<Rectangle> list = new ArrayList<Rectangle>();
|
||||||
|
setImage(bi, null);
|
||||||
|
|
||||||
|
Boxa boxes = api.TessBaseAPIGetComponentImages(handle, pageIteratorLevel, TRUE, null, null);
|
||||||
|
Leptonica leptInstance = Leptonica.INSTANCE;
|
||||||
|
int boxCount = leptInstance.boxaGetCount(boxes);
|
||||||
|
for (int i = 0; i < boxCount; i++) {
|
||||||
|
Box box = leptInstance.boxaGetBox(boxes, i, L_CLONE);
|
||||||
|
if (box == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
list.add(new Rectangle(box.x, box.y, box.w, box.h));
|
||||||
|
PointerByReference pRef = new PointerByReference();
|
||||||
|
pRef.setValue(box.getPointer());
|
||||||
|
leptInstance.boxDestroy(pRef);
|
||||||
|
}
|
||||||
|
|
||||||
|
PointerByReference pRef = new PointerByReference();
|
||||||
|
pRef.setValue(boxes.getPointer());
|
||||||
|
leptInstance.boxaDestroy(pRef);
|
||||||
|
|
||||||
|
return list;
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
// skip the problematic image
|
||||||
|
logger.error(ioe.getMessage(), ioe);
|
||||||
|
throw new TesseractException(ioe);
|
||||||
|
} finally {
|
||||||
|
dispose();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets recognized words at specified page iterator level.
|
||||||
|
*
|
||||||
|
* @param bi input image
|
||||||
|
* @param pageIteratorLevel TessPageIteratorLevel enum
|
||||||
|
* @return list of <code>Word</code>
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public List<Word> getWords(BufferedImage bi, int pageIteratorLevel) {
|
||||||
|
this.init();
|
||||||
|
this.setTessVariables();
|
||||||
|
|
||||||
|
List<Word> words = new ArrayList<Word>();
|
||||||
|
|
||||||
|
try {
|
||||||
|
setImage(bi, null);
|
||||||
|
|
||||||
|
api.TessBaseAPIRecognize(handle, null);
|
||||||
|
TessResultIterator ri = api.TessBaseAPIGetIterator(handle);
|
||||||
|
TessPageIterator pi = api.TessResultIteratorGetPageIterator(ri);
|
||||||
|
api.TessPageIteratorBegin(pi);
|
||||||
|
|
||||||
|
do {
|
||||||
|
Pointer ptr = api.TessResultIteratorGetUTF8Text(ri, pageIteratorLevel);
|
||||||
|
String text = ptr.getString(0);
|
||||||
|
api.TessDeleteText(ptr);
|
||||||
|
float confidence = api.TessResultIteratorConfidence(ri, pageIteratorLevel);
|
||||||
|
IntBuffer leftB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer topB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer rightB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer bottomB = IntBuffer.allocate(1);
|
||||||
|
api.TessPageIteratorBoundingBox(pi, pageIteratorLevel, leftB, topB, rightB, bottomB);
|
||||||
|
int left = leftB.get();
|
||||||
|
int top = topB.get();
|
||||||
|
int right = rightB.get();
|
||||||
|
int bottom = bottomB.get();
|
||||||
|
Word word = new Word(text, confidence, new Rectangle(left, top, right - left, bottom - top));
|
||||||
|
words.add(word);
|
||||||
|
} while (api.TessPageIteratorNext(pi, pageIteratorLevel) == TRUE);
|
||||||
|
|
||||||
|
return words;
|
||||||
|
} catch (Exception e) {
|
||||||
|
return words;
|
||||||
|
} finally {
|
||||||
|
dispose();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Releases all of the native resources used by this instance.
|
||||||
|
*/
|
||||||
|
protected void dispose() {
|
||||||
|
api.TessBaseAPIDelete(handle);
|
||||||
|
}
|
||||||
|
}
|
647
NGCC/Tess4J/src/net/sourceforge/tess4j/Tesseract1.java
Normal file
647
NGCC/Tess4J/src/net/sourceforge/tess4j/Tesseract1.java
Normal file
@ -0,0 +1,647 @@
|
|||||||
|
/**
|
||||||
|
* Copyright @ 2012 Quan Nguyen
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||||
|
* use this file except in compliance with the License. You may obtain a copy of
|
||||||
|
* the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package net.sourceforge.tess4j;
|
||||||
|
|
||||||
|
import com.sun.jna.Pointer;
|
||||||
|
import com.sun.jna.StringArray;
|
||||||
|
import com.sun.jna.ptr.PointerByReference;
|
||||||
|
import java.awt.Rectangle;
|
||||||
|
import java.awt.image.*;
|
||||||
|
import java.io.*;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.IntBuffer;
|
||||||
|
import java.util.*;
|
||||||
|
import javax.imageio.IIOImage;
|
||||||
|
import net.sourceforge.lept4j.Box;
|
||||||
|
import net.sourceforge.lept4j.Boxa;
|
||||||
|
import static net.sourceforge.lept4j.ILeptonica.L_CLONE;
|
||||||
|
import net.sourceforge.lept4j.Leptonica1;
|
||||||
|
import static net.sourceforge.tess4j.ITessAPI.TRUE;
|
||||||
|
|
||||||
|
import net.sourceforge.tess4j.util.ImageIOHelper;
|
||||||
|
import net.sourceforge.tess4j.util.LoggHelper;
|
||||||
|
import net.sourceforge.tess4j.util.PdfUtilities;
|
||||||
|
import org.slf4j.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An object layer on top of <code>TessAPI1</code>, provides character
|
||||||
|
* recognition support for common image formats, and multi-page TIFF images
|
||||||
|
* beyond the uncompressed, binary TIFF format supported by Tesseract OCR
|
||||||
|
* engine. The extended capabilities are provided by the
|
||||||
|
* <code>Java Advanced Imaging Image I/O Tools</code>.<br>
|
||||||
|
* <br>
|
||||||
|
* Support for PDF documents is available through <code>Ghost4J</code>, a
|
||||||
|
* <code>JNA</code> wrapper for <code>GPL Ghostscript</code>, which should be
|
||||||
|
* installed and included in system path.<br>
|
||||||
|
* <br>
|
||||||
|
* Any program that uses the library will need to ensure that the required
|
||||||
|
* libraries (the <code>.jar</code> files for <code>jna</code>,
|
||||||
|
* <code>jai-imageio</code>, and <code>ghost4j</code>) are in its compile and
|
||||||
|
* run-time <code>classpath</code>.
|
||||||
|
*/
|
||||||
|
public class Tesseract1 extends TessAPI1 implements ITesseract {
|
||||||
|
|
||||||
|
private String language = "eng";
|
||||||
|
private String datapath;
|
||||||
|
private RenderedFormat renderedFormat = RenderedFormat.TEXT;
|
||||||
|
private int psm = -1;
|
||||||
|
private int ocrEngineMode = TessOcrEngineMode.OEM_DEFAULT;
|
||||||
|
private final Properties prop = new Properties();
|
||||||
|
private final List<String> configList = new ArrayList<String>();
|
||||||
|
|
||||||
|
private TessBaseAPI handle;
|
||||||
|
|
||||||
|
private static final org.slf4j.Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
|
||||||
|
|
||||||
|
public Tesseract1() {
|
||||||
|
try {
|
||||||
|
datapath = System.getenv("TESSDATA_PREFIX");
|
||||||
|
} catch (Exception e) {
|
||||||
|
// ignore
|
||||||
|
} finally {
|
||||||
|
if (datapath == null) {
|
||||||
|
datapath = "./";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns API handle.
|
||||||
|
*
|
||||||
|
* @return handle
|
||||||
|
*/
|
||||||
|
protected TessBaseAPI getHandle() {
|
||||||
|
return handle;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets path to <code>tessdata</code>.
|
||||||
|
*
|
||||||
|
* @param datapath the tessdata path to set
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void setDatapath(String datapath) {
|
||||||
|
this.datapath = datapath;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets language for OCR.
|
||||||
|
*
|
||||||
|
* @param language the language code, which follows ISO 639-3 standard.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void setLanguage(String language) {
|
||||||
|
this.language = language;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets OCR engine mode.
|
||||||
|
*
|
||||||
|
* @param ocrEngineMode the OcrEngineMode to set
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void setOcrEngineMode(int ocrEngineMode) {
|
||||||
|
this.ocrEngineMode = ocrEngineMode;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets page segmentation mode.
|
||||||
|
*
|
||||||
|
* @param mode the page segmentation mode to set
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void setPageSegMode(int mode) {
|
||||||
|
this.psm = mode;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Enables hocr output.
|
||||||
|
*
|
||||||
|
* @param hocr to enable or disable hocr output
|
||||||
|
*/
|
||||||
|
public void setHocr(boolean hocr) {
|
||||||
|
this.renderedFormat = hocr ? RenderedFormat.HOCR : RenderedFormat.TEXT;
|
||||||
|
prop.setProperty("tessedit_create_hocr", hocr ? "1" : "0");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the value of Tesseract's internal parameter.
|
||||||
|
*
|
||||||
|
* @param key variable name, e.g., <code>tessedit_create_hocr</code>,
|
||||||
|
* <code>tessedit_char_whitelist</code>, etc.
|
||||||
|
* @param value value for corresponding variable, e.g., "1", "0",
|
||||||
|
* "0123456789", etc.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void setTessVariable(String key, String value) {
|
||||||
|
prop.setProperty(key, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets configs to be passed to Tesseract's <code>Init</code> method.
|
||||||
|
*
|
||||||
|
* @param configs list of config filenames, e.g., "digits", "bazaar",
|
||||||
|
* "quiet"
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void setConfigs(List<String> configs) {
|
||||||
|
configList.clear();
|
||||||
|
if (configs != null) {
|
||||||
|
configList.addAll(configs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs OCR operation.
|
||||||
|
*
|
||||||
|
* @param imageFile an image file
|
||||||
|
* @return the recognized text
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String doOCR(File imageFile) throws TesseractException {
|
||||||
|
return doOCR(imageFile, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs OCR operation.
|
||||||
|
*
|
||||||
|
* @param imageFile an image file
|
||||||
|
* @param rect the bounding rectangle defines the region of the image to be
|
||||||
|
* recognized. A rectangle of zero dimension or <code>null</code> indicates
|
||||||
|
* the whole image.
|
||||||
|
* @return the recognized text
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String doOCR(File imageFile, Rectangle rect) throws TesseractException {
|
||||||
|
try {
|
||||||
|
return doOCR(ImageIOHelper.getIIOImageList(imageFile), imageFile.getPath(), rect);
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error(e.getMessage(), e);
|
||||||
|
throw new TesseractException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs OCR operation.
|
||||||
|
*
|
||||||
|
* @param bi a buffered image
|
||||||
|
* @return the recognized text
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String doOCR(BufferedImage bi) throws TesseractException {
|
||||||
|
return doOCR(bi, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs OCR operation.
|
||||||
|
*
|
||||||
|
* @param bi a buffered image
|
||||||
|
* @param rect the bounding rectangle defines the region of the image to be
|
||||||
|
* recognized. A rectangle of zero dimension or <code>null</code> indicates
|
||||||
|
* the whole image.
|
||||||
|
* @return the recognized text
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String doOCR(BufferedImage bi, Rectangle rect) throws TesseractException {
|
||||||
|
try {
|
||||||
|
return doOCR(ImageIOHelper.getIIOImageList(bi), rect);
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error(e.getMessage(), e);
|
||||||
|
throw new TesseractException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs OCR operation.
|
||||||
|
*
|
||||||
|
* @param imageList a list of <code>IIOImage</code> objects
|
||||||
|
* @param rect the bounding rectangle defines the region of the image to be
|
||||||
|
* recognized. A rectangle of zero dimension or <code>null</code> indicates
|
||||||
|
* the whole image.
|
||||||
|
* @return the recognized text
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String doOCR(List<IIOImage> imageList, Rectangle rect) throws TesseractException {
|
||||||
|
return doOCR(imageList, null, rect);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs OCR operation.
|
||||||
|
*
|
||||||
|
* @param imageList a list of <code>IIOImage</code> objects
|
||||||
|
* @param filename input file name
|
||||||
|
* @param rect the bounding rectangle defines the region of the image to be
|
||||||
|
* recognized. A rectangle of zero dimension or <code>null</code> indicates
|
||||||
|
* the whole image.
|
||||||
|
* @return the recognized text
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String doOCR(List<IIOImage> imageList, String filename, Rectangle rect) throws TesseractException {
|
||||||
|
init();
|
||||||
|
setTessVariables();
|
||||||
|
|
||||||
|
try {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
int pageNum = 0;
|
||||||
|
|
||||||
|
for (IIOImage oimage : imageList) {
|
||||||
|
pageNum++;
|
||||||
|
try {
|
||||||
|
setImage(oimage.getRenderedImage(), rect);
|
||||||
|
sb.append(getOCRText(filename, pageNum));
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
// skip the problematic image
|
||||||
|
logger.error(ioe.getMessage(), ioe);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (renderedFormat == RenderedFormat.HOCR) {
|
||||||
|
sb.insert(0, htmlBeginTag).append(htmlEndTag);
|
||||||
|
}
|
||||||
|
|
||||||
|
return sb.toString();
|
||||||
|
} finally {
|
||||||
|
dispose();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs OCR operation. Use <code>SetImage</code>, (optionally)
|
||||||
|
* <code>SetRectangle</code>, and one or more of the <code>Get*Text</code>
|
||||||
|
* functions.
|
||||||
|
*
|
||||||
|
* @param xsize width of image
|
||||||
|
* @param ysize height of image
|
||||||
|
* @param buf pixel data
|
||||||
|
* @param rect the bounding rectangle defines the region of the image to be
|
||||||
|
* recognized. A rectangle of zero dimension or <code>null</code> indicates
|
||||||
|
* the whole image.
|
||||||
|
* @param bpp bits per pixel, represents the bit depth of the image, with 1
|
||||||
|
* for binary bitmap, 8 for gray, and 24 for color RGB.
|
||||||
|
* @return the recognized text
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String doOCR(int xsize, int ysize, ByteBuffer buf, Rectangle rect, int bpp) throws TesseractException {
|
||||||
|
return doOCR(xsize, ysize, buf, null, rect, bpp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs OCR operation. Use <code>SetImage</code>, (optionally)
|
||||||
|
* <code>SetRectangle</code>, and one or more of the <code>Get*Text</code>
|
||||||
|
* functions.
|
||||||
|
*
|
||||||
|
* @param xsize width of image
|
||||||
|
* @param ysize height of image
|
||||||
|
* @param buf pixel data
|
||||||
|
* @param filename input file name. Needed only for training and reading a
|
||||||
|
* UNLV zone file.
|
||||||
|
* @param rect the bounding rectangle defines the region of the image to be
|
||||||
|
* recognized. A rectangle of zero dimension or <code>null</code> indicates
|
||||||
|
* the whole image.
|
||||||
|
* @param bpp bits per pixel, represents the bit depth of the image, with 1
|
||||||
|
* for binary bitmap, 8 for gray, and 24 for color RGB.
|
||||||
|
* @return the recognized text
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String doOCR(int xsize, int ysize, ByteBuffer buf, String filename, Rectangle rect, int bpp) throws TesseractException {
|
||||||
|
init();
|
||||||
|
setTessVariables();
|
||||||
|
|
||||||
|
try {
|
||||||
|
setImage(xsize, ysize, buf, rect, bpp);
|
||||||
|
return getOCRText(filename, 1);
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error(e.getMessage(), e);
|
||||||
|
throw new TesseractException(e);
|
||||||
|
} finally {
|
||||||
|
dispose();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initializes Tesseract engine.
|
||||||
|
*/
|
||||||
|
protected void init() {
|
||||||
|
handle = TessBaseAPICreate();
|
||||||
|
StringArray sarray = new StringArray(configList.toArray(new String[0]));
|
||||||
|
PointerByReference configs = new PointerByReference();
|
||||||
|
configs.setPointer(sarray);
|
||||||
|
TessBaseAPIInit1(handle, datapath, language, ocrEngineMode, configs, configList.size());
|
||||||
|
if (psm > -1) {
|
||||||
|
TessBaseAPISetPageSegMode(handle, psm);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets Tesseract's internal parameters.
|
||||||
|
*/
|
||||||
|
protected void setTessVariables() {
|
||||||
|
Enumeration<?> em = prop.propertyNames();
|
||||||
|
while (em.hasMoreElements()) {
|
||||||
|
String key = (String) em.nextElement();
|
||||||
|
TessBaseAPISetVariable(handle, key, prop.getProperty(key));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A wrapper for {@link #setImage(int, int, ByteBuffer, Rectangle, int)}.
|
||||||
|
*
|
||||||
|
* @param image a rendered image
|
||||||
|
* @param rect region of interest
|
||||||
|
* @throws java.io.IOException
|
||||||
|
*/
|
||||||
|
protected void setImage(RenderedImage image, Rectangle rect) throws IOException {
|
||||||
|
setImage(image.getWidth(), image.getHeight(), ImageIOHelper.getImageByteBuffer(image), rect, image
|
||||||
|
.getColorModel().getPixelSize());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets image to be processed.
|
||||||
|
*
|
||||||
|
* @param xsize width of image
|
||||||
|
* @param ysize height of image
|
||||||
|
* @param buf pixel data
|
||||||
|
* @param rect the bounding rectangle defines the region of the image to be
|
||||||
|
* recognized. A rectangle of zero dimension or <code>null</code> indicates
|
||||||
|
* the whole image.
|
||||||
|
* @param bpp bits per pixel, represents the bit depth of the image, with 1
|
||||||
|
* for binary bitmap, 8 for gray, and 24 for color RGB.
|
||||||
|
*/
|
||||||
|
protected void setImage(int xsize, int ysize, ByteBuffer buf, Rectangle rect, int bpp) {
|
||||||
|
int bytespp = bpp / 8;
|
||||||
|
int bytespl = (int) Math.ceil(xsize * bpp / 8.0);
|
||||||
|
TessBaseAPISetImage(handle, buf, xsize, ysize, bytespp, bytespl);
|
||||||
|
|
||||||
|
if (rect != null && !rect.isEmpty()) {
|
||||||
|
TessBaseAPISetRectangle(handle, rect.x, rect.y, rect.width, rect.height);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets recognized text.
|
||||||
|
*
|
||||||
|
* @param filename input file name. Needed only for reading a UNLV zone
|
||||||
|
* file.
|
||||||
|
* @param pageNum page number; needed for hocr paging.
|
||||||
|
* @return the recognized text
|
||||||
|
*/
|
||||||
|
protected String getOCRText(String filename, int pageNum) {
|
||||||
|
if (filename != null && !filename.isEmpty()) {
|
||||||
|
TessBaseAPISetInputName(handle, filename);
|
||||||
|
}
|
||||||
|
|
||||||
|
Pointer utf8Text = renderedFormat == RenderedFormat.HOCR ? TessBaseAPIGetHOCRText(handle, pageNum - 1) : TessBaseAPIGetUTF8Text(handle);
|
||||||
|
String str = utf8Text.getString(0);
|
||||||
|
TessDeleteText(utf8Text);
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates renderers for given formats.
|
||||||
|
*
|
||||||
|
* @param outputbase
|
||||||
|
* @param formats
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
private TessResultRenderer createRenderers(String outputbase, List<RenderedFormat> formats) {
|
||||||
|
TessResultRenderer renderer = null;
|
||||||
|
|
||||||
|
for (RenderedFormat format : formats) {
|
||||||
|
switch (format) {
|
||||||
|
case TEXT:
|
||||||
|
if (renderer == null) {
|
||||||
|
renderer = TessTextRendererCreate(outputbase);
|
||||||
|
} else {
|
||||||
|
TessResultRendererInsert(renderer, TessTextRendererCreate(outputbase));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case HOCR:
|
||||||
|
if (renderer == null) {
|
||||||
|
renderer = TessHOcrRendererCreate(outputbase);
|
||||||
|
} else {
|
||||||
|
TessResultRendererInsert(renderer, TessHOcrRendererCreate(outputbase));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case PDF:
|
||||||
|
String dataPath = TessBaseAPIGetDatapath(handle);
|
||||||
|
if (renderer == null) {
|
||||||
|
renderer = TessPDFRendererCreate(outputbase, dataPath);
|
||||||
|
} else {
|
||||||
|
TessResultRendererInsert(renderer, TessPDFRendererCreate(outputbase, dataPath));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case BOX:
|
||||||
|
if (renderer == null) {
|
||||||
|
renderer = TessBoxTextRendererCreate(outputbase);
|
||||||
|
} else {
|
||||||
|
TessResultRendererInsert(renderer, TessBoxTextRendererCreate(outputbase));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case UNLV:
|
||||||
|
if (renderer == null) {
|
||||||
|
renderer = TessUnlvRendererCreate(outputbase);
|
||||||
|
} else {
|
||||||
|
TessResultRendererInsert(renderer, TessUnlvRendererCreate(outputbase));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return renderer;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates documents for given renderer.
|
||||||
|
*
|
||||||
|
* @param filename input image
|
||||||
|
* @param outputbase output filename without extension
|
||||||
|
* @param formats types of renderer
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void createDocuments(String filename, String outputbase, List<RenderedFormat> formats) throws TesseractException {
|
||||||
|
createDocuments(new String[]{filename}, new String[]{outputbase}, formats);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates documents.
|
||||||
|
*
|
||||||
|
* @param filenames array of input files
|
||||||
|
* @param outputbases array of output filenames without extension
|
||||||
|
* @param formats types of renderer
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void createDocuments(String[] filenames, String[] outputbases, List<RenderedFormat> formats) throws TesseractException {
|
||||||
|
if (filenames.length != outputbases.length) {
|
||||||
|
throw new RuntimeException("The two arrays must match in length.");
|
||||||
|
}
|
||||||
|
|
||||||
|
init();
|
||||||
|
setTessVariables();
|
||||||
|
|
||||||
|
try {
|
||||||
|
for (int i = 0; i < filenames.length; i++) {
|
||||||
|
File workingTiffFile = null;
|
||||||
|
try {
|
||||||
|
String filename = filenames[i];
|
||||||
|
|
||||||
|
// if PDF, convert to multi-page TIFF
|
||||||
|
if (filename.toLowerCase().endsWith(".pdf")) {
|
||||||
|
workingTiffFile = PdfUtilities.convertPdf2Tiff(new File(filename));
|
||||||
|
filename = workingTiffFile.getPath();
|
||||||
|
}
|
||||||
|
|
||||||
|
TessResultRenderer renderer = createRenderers(outputbases[i], formats);
|
||||||
|
createDocuments(filename, renderer);
|
||||||
|
TessDeleteResultRenderer(renderer);
|
||||||
|
} catch (Exception e) {
|
||||||
|
// skip the problematic image file
|
||||||
|
logger.error(e.getMessage(), e);
|
||||||
|
} finally {
|
||||||
|
if (workingTiffFile != null && workingTiffFile.exists()) {
|
||||||
|
workingTiffFile.delete();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
dispose();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates documents.
|
||||||
|
*
|
||||||
|
* @param filename input file
|
||||||
|
* @param renderer renderer
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
private void createDocuments(String filename, TessResultRenderer renderer) throws TesseractException {
|
||||||
|
TessBaseAPISetInputName(handle, filename); //for reading a UNLV zone file
|
||||||
|
int result = TessBaseAPIProcessPages(handle, filename, null, 0, renderer);
|
||||||
|
|
||||||
|
// if (result == ITessAPI.FALSE) {
|
||||||
|
// throw new TesseractException("Error during processing page.");
|
||||||
|
// }
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets segmented regions at specified page iterator level.
|
||||||
|
*
|
||||||
|
* @param bi input image
|
||||||
|
* @param pageIteratorLevel TessPageIteratorLevel enum
|
||||||
|
* @return list of <code>Rectangle</code>
|
||||||
|
* @throws TesseractException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public List<Rectangle> getSegmentedRegions(BufferedImage bi, int pageIteratorLevel) throws TesseractException {
|
||||||
|
init();
|
||||||
|
setTessVariables();
|
||||||
|
|
||||||
|
try {
|
||||||
|
List<Rectangle> list = new ArrayList<Rectangle>();
|
||||||
|
setImage(bi, null);
|
||||||
|
|
||||||
|
Boxa boxes = TessBaseAPIGetComponentImages(handle, pageIteratorLevel, TRUE, null, null);
|
||||||
|
int boxCount = Leptonica1.boxaGetCount(boxes);
|
||||||
|
for (int i = 0; i < boxCount; i++) {
|
||||||
|
Box box = Leptonica1.boxaGetBox(boxes, i, L_CLONE);
|
||||||
|
if (box == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
list.add(new Rectangle(box.x, box.y, box.w, box.h));
|
||||||
|
PointerByReference pRef = new PointerByReference();
|
||||||
|
pRef.setValue(box.getPointer());
|
||||||
|
Leptonica1.boxDestroy(pRef);
|
||||||
|
}
|
||||||
|
|
||||||
|
PointerByReference pRef = new PointerByReference();
|
||||||
|
pRef.setValue(boxes.getPointer());
|
||||||
|
Leptonica1.boxaDestroy(pRef);
|
||||||
|
|
||||||
|
return list;
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
// skip the problematic image
|
||||||
|
logger.error(ioe.getMessage(), ioe);
|
||||||
|
throw new TesseractException(ioe);
|
||||||
|
} finally {
|
||||||
|
dispose();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets recognized words at specified page iterator level.
|
||||||
|
*
|
||||||
|
* @param bi input image
|
||||||
|
* @param pageIteratorLevel TessPageIteratorLevel enum
|
||||||
|
* @return list of <code>Word</code>
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public List<Word> getWords(BufferedImage bi, int pageIteratorLevel) {
|
||||||
|
this.init();
|
||||||
|
this.setTessVariables();
|
||||||
|
|
||||||
|
List<Word> words = new ArrayList<Word>();
|
||||||
|
|
||||||
|
try {
|
||||||
|
setImage(bi, null);
|
||||||
|
|
||||||
|
TessBaseAPIRecognize(handle, null);
|
||||||
|
TessResultIterator ri = TessBaseAPIGetIterator(handle);
|
||||||
|
TessPageIterator pi = TessResultIteratorGetPageIterator(ri);
|
||||||
|
TessPageIteratorBegin(pi);
|
||||||
|
|
||||||
|
do {
|
||||||
|
Pointer ptr = TessResultIteratorGetUTF8Text(ri, pageIteratorLevel);
|
||||||
|
String text = ptr.getString(0);
|
||||||
|
TessAPI1.TessDeleteText(ptr);
|
||||||
|
float confidence = TessResultIteratorConfidence(ri, pageIteratorLevel);
|
||||||
|
IntBuffer leftB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer topB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer rightB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer bottomB = IntBuffer.allocate(1);
|
||||||
|
TessPageIteratorBoundingBox(pi, pageIteratorLevel, leftB, topB, rightB, bottomB);
|
||||||
|
int left = leftB.get();
|
||||||
|
int top = topB.get();
|
||||||
|
int right = rightB.get();
|
||||||
|
int bottom = bottomB.get();
|
||||||
|
Word word = new Word(text, confidence, new Rectangle(left, top, right - left, bottom - top));
|
||||||
|
words.add(word);
|
||||||
|
} while (TessPageIteratorNext(pi, pageIteratorLevel) == TRUE);
|
||||||
|
|
||||||
|
return words;
|
||||||
|
} catch (Exception e) {
|
||||||
|
return words;
|
||||||
|
} finally {
|
||||||
|
dispose();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Releases all of the native resources used by this instance.
|
||||||
|
*/
|
||||||
|
protected void dispose() {
|
||||||
|
TessBaseAPIDelete(handle);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,35 @@
|
|||||||
|
/**
|
||||||
|
* Copyright @ 2010 Quan Nguyen
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||||
|
* use this file except in compliance with the License. You may obtain a copy of
|
||||||
|
* the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package net.sourceforge.tess4j;
|
||||||
|
|
||||||
|
public class TesseractException extends Exception {
|
||||||
|
|
||||||
|
public TesseractException() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
public TesseractException(String message) {
|
||||||
|
super(message);
|
||||||
|
}
|
||||||
|
|
||||||
|
public TesseractException(Throwable cause) {
|
||||||
|
super(cause);
|
||||||
|
}
|
||||||
|
|
||||||
|
public TesseractException(String message, Throwable cause) {
|
||||||
|
super(message, cause);
|
||||||
|
}
|
||||||
|
}
|
67
NGCC/Tess4J/src/net/sourceforge/tess4j/Word.java
Normal file
67
NGCC/Tess4J/src/net/sourceforge/tess4j/Word.java
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
/**
|
||||||
|
* Copyright @ 2015 Quan Nguyen
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||||
|
* use this file except in compliance with the License. You may obtain a copy of
|
||||||
|
* the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package net.sourceforge.tess4j;
|
||||||
|
|
||||||
|
import java.awt.Rectangle;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encapsulates Tesseract OCR results.
|
||||||
|
*/
|
||||||
|
public class Word {
|
||||||
|
|
||||||
|
private final String text;
|
||||||
|
private final float confidence;
|
||||||
|
private final Rectangle rect;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor.
|
||||||
|
*
|
||||||
|
* @param text
|
||||||
|
* @param confidence
|
||||||
|
* @param boundingBox
|
||||||
|
*/
|
||||||
|
public Word(String text, float confidence, Rectangle boundingBox) {
|
||||||
|
this.text = text;
|
||||||
|
this.confidence = confidence;
|
||||||
|
this.rect = boundingBox;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the text
|
||||||
|
*/
|
||||||
|
public String getText() {
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the confidence
|
||||||
|
*/
|
||||||
|
public float getConfidence() {
|
||||||
|
return confidence;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the bounding box
|
||||||
|
*/
|
||||||
|
public Rectangle getBoundingBox() {
|
||||||
|
return rect;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return String.format("%s [Confidence: %f Bounding box: %d %d %d %d]", text, confidence, rect.x, rect.y, rect.width, rect.height);
|
||||||
|
}
|
||||||
|
}
|
216
NGCC/Tess4J/src/net/sourceforge/tess4j/util/ImageHelper.java
Normal file
216
NGCC/Tess4J/src/net/sourceforge/tess4j/util/ImageHelper.java
Normal file
@ -0,0 +1,216 @@
|
|||||||
|
/**
|
||||||
|
* Copyright @ 2008 Quan Nguyen
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||||
|
* use this file except in compliance with the License. You may obtain a copy of
|
||||||
|
* the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package net.sourceforge.tess4j.util;
|
||||||
|
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.awt.Graphics2D;
|
||||||
|
import java.awt.Image;
|
||||||
|
import java.awt.RenderingHints;
|
||||||
|
import java.awt.Toolkit;
|
||||||
|
import java.awt.Transparency;
|
||||||
|
import java.awt.datatransfer.Clipboard;
|
||||||
|
import java.awt.datatransfer.DataFlavor;
|
||||||
|
import java.awt.image.*;
|
||||||
|
import javax.imageio.IIOImage;
|
||||||
|
|
||||||
|
public class ImageHelper {
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convenience method that returns a scaled instance of the provided
|
||||||
|
* {@code BufferedImage}.
|
||||||
|
*
|
||||||
|
* @param image the original image to be scaled
|
||||||
|
* @param targetWidth the desired width of the scaled instance, in pixels
|
||||||
|
* @param targetHeight the desired height of the scaled instance, in pixels
|
||||||
|
* @return a scaled version of the original {@code BufferedImage}
|
||||||
|
*/
|
||||||
|
public static BufferedImage getScaledInstance(BufferedImage image, int targetWidth, int targetHeight) {
|
||||||
|
int type = (image.getTransparency() == Transparency.OPAQUE)
|
||||||
|
? BufferedImage.TYPE_INT_RGB : BufferedImage.TYPE_INT_ARGB;
|
||||||
|
BufferedImage tmp = new BufferedImage(targetWidth, targetHeight, type);
|
||||||
|
Graphics2D g2 = tmp.createGraphics();
|
||||||
|
g2.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC);
|
||||||
|
g2.drawImage(image, 0, 0, targetWidth, targetHeight, null);
|
||||||
|
g2.dispose();
|
||||||
|
return tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convenience method that returns a scaled instance of the provided
|
||||||
|
* {@code IIOImage}.
|
||||||
|
*
|
||||||
|
* @param iioSource the original image to be scaled
|
||||||
|
* @param scale the desired scale
|
||||||
|
* @return a scaled version of the original {@code IIOImage}
|
||||||
|
*/
|
||||||
|
public static IIOImage getScaledInstance(IIOImage iioSource, float scale) {
|
||||||
|
if (!(iioSource.getRenderedImage() instanceof BufferedImage)) {
|
||||||
|
throw new IllegalArgumentException("RenderedImage in IIOImage must be BufferedImage");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Math.abs(scale - 1.0) < 0.001) {
|
||||||
|
return iioSource;
|
||||||
|
}
|
||||||
|
|
||||||
|
BufferedImage source = (BufferedImage) iioSource.getRenderedImage();
|
||||||
|
BufferedImage target = getScaledInstance(source, (int) (scale * source.getWidth()), (int) (scale * source.getHeight()));
|
||||||
|
return new IIOImage(target, null, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A replacement for the standard <code>BufferedImage.getSubimage</code>
|
||||||
|
* method.
|
||||||
|
*
|
||||||
|
* @param image
|
||||||
|
* @param x the X coordinate of the upper-left corner of the specified
|
||||||
|
* rectangular region
|
||||||
|
* @param y the Y coordinate of the upper-left corner of the specified
|
||||||
|
* rectangular region
|
||||||
|
* @param width the width of the specified rectangular region
|
||||||
|
* @param height the height of the specified rectangular region
|
||||||
|
* @return a BufferedImage that is the subimage of <code>image</code>.
|
||||||
|
*/
|
||||||
|
public static BufferedImage getSubImage(BufferedImage image, int x, int y, int width, int height) {
|
||||||
|
int type = (image.getTransparency() == Transparency.OPAQUE)
|
||||||
|
? BufferedImage.TYPE_INT_RGB : BufferedImage.TYPE_INT_ARGB;
|
||||||
|
BufferedImage tmp = new BufferedImage(width, height, type);
|
||||||
|
Graphics2D g2 = tmp.createGraphics();
|
||||||
|
g2.drawImage(image.getSubimage(x, y, width, height), 0, 0, null);
|
||||||
|
g2.dispose();
|
||||||
|
return tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A simple method to convert an image to binary or B/W image.
|
||||||
|
*
|
||||||
|
* @param image input image
|
||||||
|
* @return a monochrome image
|
||||||
|
*/
|
||||||
|
public static BufferedImage convertImageToBinary(BufferedImage image) {
|
||||||
|
BufferedImage tmp = new BufferedImage(image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_BINARY);
|
||||||
|
Graphics2D g2 = tmp.createGraphics();
|
||||||
|
g2.drawImage(image, 0, 0, null);
|
||||||
|
g2.dispose();
|
||||||
|
return tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A simple method to convert an image to binary or B/W image.
|
||||||
|
*
|
||||||
|
* @param image input image
|
||||||
|
* @return a monochrome image
|
||||||
|
* @deprecated As of release 1.1, renamed to
|
||||||
|
* {@link #convertImageToBinary(BufferedImage image)}
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
|
public static BufferedImage convertImage2Binary(BufferedImage image) {
|
||||||
|
return convertImageToBinary(image);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A simple method to convert an image to gray scale.
|
||||||
|
*
|
||||||
|
* @param image input image
|
||||||
|
* @return a monochrome image
|
||||||
|
*/
|
||||||
|
public static BufferedImage convertImageToGrayscale(BufferedImage image) {
|
||||||
|
BufferedImage tmp = new BufferedImage(image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_GRAY);
|
||||||
|
Graphics2D g2 = tmp.createGraphics();
|
||||||
|
g2.drawImage(image, 0, 0, null);
|
||||||
|
g2.dispose();
|
||||||
|
return tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final short[] invertTable;
|
||||||
|
|
||||||
|
static {
|
||||||
|
invertTable = new short[256];
|
||||||
|
for (int i = 0; i < 256; i++) {
|
||||||
|
invertTable[i] = (short) (255 - i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Inverts image color.
|
||||||
|
*
|
||||||
|
* @param image input image
|
||||||
|
* @return an inverted-color image
|
||||||
|
*/
|
||||||
|
public static BufferedImage invertImageColor(BufferedImage image) {
|
||||||
|
BufferedImage tmp = new BufferedImage(image.getWidth(), image.getHeight(), image.getType());
|
||||||
|
BufferedImageOp invertOp = new LookupOp(new ShortLookupTable(0, invertTable), null);
|
||||||
|
return invertOp.filter(image, tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Rotates an image.
|
||||||
|
*
|
||||||
|
* @param image the original image
|
||||||
|
* @param angle the degree of rotation
|
||||||
|
* @return a rotated image
|
||||||
|
*/
|
||||||
|
public static BufferedImage rotateImage(BufferedImage image, double angle) {
|
||||||
|
double theta = Math.toRadians(angle);
|
||||||
|
double sin = Math.abs(Math.sin(theta));
|
||||||
|
double cos = Math.abs(Math.cos(theta));
|
||||||
|
int w = image.getWidth();
|
||||||
|
int h = image.getHeight();
|
||||||
|
int newW = (int) Math.floor(w * cos + h * sin);
|
||||||
|
int newH = (int) Math.floor(h * cos + w * sin);
|
||||||
|
|
||||||
|
BufferedImage tmp = new BufferedImage(newW, newH, image.getType());
|
||||||
|
Graphics2D g2d = tmp.createGraphics();
|
||||||
|
g2d.setRenderingHint(RenderingHints.KEY_INTERPOLATION,
|
||||||
|
RenderingHints.VALUE_INTERPOLATION_BICUBIC);
|
||||||
|
g2d.translate((newW - w) / 2, (newH - h) / 2);
|
||||||
|
g2d.rotate(theta, w / 2, h / 2);
|
||||||
|
g2d.drawImage(image, 0, 0, null);
|
||||||
|
g2d.dispose();
|
||||||
|
return tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets an image from Clipboard.
|
||||||
|
*
|
||||||
|
* @return image
|
||||||
|
*/
|
||||||
|
public static Image getClipboardImage() {
|
||||||
|
Clipboard clipboard = Toolkit.getDefaultToolkit().getSystemClipboard();
|
||||||
|
try {
|
||||||
|
return (Image) clipboard.getData(DataFlavor.imageFlavor);
|
||||||
|
} catch (Exception e) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clones an image.
|
||||||
|
* http://stackoverflow.com/questions/3514158/how-do-you-clone-a-bufferedimage
|
||||||
|
*
|
||||||
|
* @param bi
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public static BufferedImage cloneImage(BufferedImage bi) {
|
||||||
|
ColorModel cm = bi.getColorModel();
|
||||||
|
boolean isAlphaPremultiplied = cm.isAlphaPremultiplied();
|
||||||
|
WritableRaster raster = bi.copyData(null);
|
||||||
|
return new BufferedImage(cm, raster, isAlphaPremultiplied, null);
|
||||||
|
}
|
||||||
|
}
|
642
NGCC/Tess4J/src/net/sourceforge/tess4j/util/ImageIOHelper.java
Normal file
642
NGCC/Tess4J/src/net/sourceforge/tess4j/util/ImageIOHelper.java
Normal file
@ -0,0 +1,642 @@
|
|||||||
|
/**
|
||||||
|
* Copyright @ 2008 Quan Nguyen
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||||
|
* use this file except in compliance with the License. You may obtain a copy of
|
||||||
|
* the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package net.sourceforge.tess4j.util;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.util.*;
|
||||||
|
import javax.imageio.*;
|
||||||
|
import javax.imageio.stream.*;
|
||||||
|
import javax.imageio.metadata.*;
|
||||||
|
import java.awt.Toolkit;
|
||||||
|
import java.awt.image.*;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.ByteOrder;
|
||||||
|
|
||||||
|
import org.w3c.dom.NodeList;
|
||||||
|
|
||||||
|
import com.github.jaiimageio.plugins.tiff.*;
|
||||||
|
import com.recognition.software.jdeskew.ImageDeskew;
|
||||||
|
import com.recognition.software.jdeskew.ImageUtil;
|
||||||
|
import org.apache.commons.io.FilenameUtils;
|
||||||
|
|
||||||
|
public class ImageIOHelper {
|
||||||
|
|
||||||
|
final static String OUTPUT_FILE_NAME = "Tesstmp";
|
||||||
|
final static String TIFF_EXT = ".tif";
|
||||||
|
final static String TIFF_FORMAT = "tiff";
|
||||||
|
final static String JAI_IMAGE_WRITER_MESSAGE = "Need to install JAI Image I/O package.\nhttps://java.net/projects/jai-imageio/";
|
||||||
|
final static String JAI_IMAGE_READER_MESSAGE = "Unsupported image format. May need to install JAI Image I/O package.\nhttps://java.net/projects/jai-imageio/";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a list of TIFF image files from an image file. It basically
|
||||||
|
* converts images of other formats to TIFF format, or a multi-page TIFF
|
||||||
|
* image to multiple TIFF image files.
|
||||||
|
*
|
||||||
|
* @param imageFile input image file
|
||||||
|
* @param index an index of the page; -1 means all pages, as in a multi-page
|
||||||
|
* TIFF image
|
||||||
|
* @return a list of TIFF image files
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public static List<File> createTiffFiles(File imageFile, int index) throws IOException {
|
||||||
|
return createTiffFiles(imageFile, index, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a list of TIFF image files from an image file. It basically
|
||||||
|
* converts images of other formats to TIFF format, or a multi-page TIFF
|
||||||
|
* image to multiple TIFF image files.
|
||||||
|
*
|
||||||
|
* @param imageFile input image file
|
||||||
|
* @param index an index of the page; -1 means all pages, as in a multi-page
|
||||||
|
* TIFF image
|
||||||
|
* @param preserve preserve compression mode
|
||||||
|
* @return a list of TIFF image files
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public static List<File> createTiffFiles(File imageFile, int index, boolean preserve) throws IOException {
|
||||||
|
List<File> tiffFiles = new ArrayList<File>();
|
||||||
|
|
||||||
|
String imageFileName = imageFile.getName();
|
||||||
|
String imageFormat = imageFileName.substring(imageFileName.lastIndexOf('.') + 1);
|
||||||
|
|
||||||
|
Iterator<ImageReader> readers = ImageIO.getImageReadersByFormatName(imageFormat);
|
||||||
|
|
||||||
|
if (!readers.hasNext()) {
|
||||||
|
throw new RuntimeException(JAI_IMAGE_READER_MESSAGE);
|
||||||
|
}
|
||||||
|
|
||||||
|
ImageReader reader = readers.next();
|
||||||
|
|
||||||
|
ImageInputStream iis = ImageIO.createImageInputStream(imageFile);
|
||||||
|
reader.setInput(iis);
|
||||||
|
//Read the stream metadata
|
||||||
|
// IIOMetadata streamMetadata = reader.getStreamMetadata();
|
||||||
|
|
||||||
|
//Set up the writeParam
|
||||||
|
TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.US);
|
||||||
|
|
||||||
|
if (!preserve) {
|
||||||
|
tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED); // not preserve original sizes; decompress
|
||||||
|
}
|
||||||
|
|
||||||
|
//Get tif writer and set output to file
|
||||||
|
Iterator<ImageWriter> writers = ImageIO.getImageWritersByFormatName(TIFF_FORMAT);
|
||||||
|
|
||||||
|
if (!writers.hasNext()) {
|
||||||
|
throw new RuntimeException(JAI_IMAGE_WRITER_MESSAGE);
|
||||||
|
}
|
||||||
|
|
||||||
|
ImageWriter writer = writers.next();
|
||||||
|
|
||||||
|
//Read the stream metadata
|
||||||
|
IIOMetadata streamMetadata = writer.getDefaultStreamMetadata(tiffWriteParam);
|
||||||
|
|
||||||
|
int imageTotal = reader.getNumImages(true);
|
||||||
|
|
||||||
|
for (int i = 0; i < imageTotal; i++) {
|
||||||
|
// all if index == -1; otherwise, only index-th
|
||||||
|
if (index == -1 || i == index) {
|
||||||
|
// BufferedImage bi = reader.read(i);
|
||||||
|
// IIOImage oimage = new IIOImage(bi, null, reader.getImageMetadata(i));
|
||||||
|
IIOImage oimage = reader.readAll(i, reader.getDefaultReadParam());
|
||||||
|
File tiffFile = File.createTempFile(OUTPUT_FILE_NAME, TIFF_EXT);
|
||||||
|
ImageOutputStream ios = ImageIO.createImageOutputStream(tiffFile);
|
||||||
|
writer.setOutput(ios);
|
||||||
|
writer.write(streamMetadata, oimage, tiffWriteParam);
|
||||||
|
ios.close();
|
||||||
|
tiffFiles.add(tiffFile);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
writer.dispose();
|
||||||
|
reader.dispose();
|
||||||
|
|
||||||
|
return tiffFiles;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a list of TIFF image files from a list of <code>IIOImage</code>
|
||||||
|
* objects.
|
||||||
|
*
|
||||||
|
* @param imageList a list of <code>IIOImage</code> objects
|
||||||
|
* @param index an index of the page; -1 means all pages
|
||||||
|
* @return a list of TIFF image files
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public static List<File> createTiffFiles(List<IIOImage> imageList, int index) throws IOException {
|
||||||
|
return createTiffFiles(imageList, index, 0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static List<File> createTiffFiles(List<IIOImage> imageList, int index, int dpiX, int dpiY) throws IOException {
|
||||||
|
List<File> tiffFiles = new ArrayList<File>();
|
||||||
|
|
||||||
|
//Set up the writeParam
|
||||||
|
TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.US);
|
||||||
|
tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED);
|
||||||
|
|
||||||
|
//Get tif writer and set output to file
|
||||||
|
Iterator<ImageWriter> writers = ImageIO.getImageWritersByFormatName(TIFF_FORMAT);
|
||||||
|
|
||||||
|
if (!writers.hasNext()) {
|
||||||
|
throw new RuntimeException(JAI_IMAGE_WRITER_MESSAGE);
|
||||||
|
}
|
||||||
|
|
||||||
|
ImageWriter writer = writers.next();
|
||||||
|
|
||||||
|
//Get the stream metadata
|
||||||
|
IIOMetadata streamMetadata = writer.getDefaultStreamMetadata(tiffWriteParam);
|
||||||
|
|
||||||
|
// all if index == -1; otherwise, only index-th
|
||||||
|
for (IIOImage oimage : (index == -1 ? imageList : imageList.subList(index, index + 1))) {
|
||||||
|
if (dpiX != 0 && dpiY != 0) {
|
||||||
|
// Get the default image metadata.
|
||||||
|
ImageTypeSpecifier imageType = ImageTypeSpecifier.createFromRenderedImage(oimage.getRenderedImage());
|
||||||
|
IIOMetadata imageMetadata = writer.getDefaultImageMetadata(imageType, null);
|
||||||
|
imageMetadata = setDPIViaAPI(imageMetadata, dpiX, dpiY);
|
||||||
|
oimage.setMetadata(imageMetadata);
|
||||||
|
}
|
||||||
|
|
||||||
|
File tiffFile = File.createTempFile(OUTPUT_FILE_NAME, TIFF_EXT);
|
||||||
|
ImageOutputStream ios = ImageIO.createImageOutputStream(tiffFile);
|
||||||
|
writer.setOutput(ios);
|
||||||
|
writer.write(streamMetadata, oimage, tiffWriteParam);
|
||||||
|
ios.close();
|
||||||
|
tiffFiles.add(tiffFile);
|
||||||
|
}
|
||||||
|
writer.dispose();
|
||||||
|
|
||||||
|
return tiffFiles;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set DPI using API.
|
||||||
|
*
|
||||||
|
* @param imageMetadata original IIOMetadata
|
||||||
|
* @param dpiX horizontal resolution
|
||||||
|
* @param dpiY vertical resolution
|
||||||
|
* @return modified IIOMetadata
|
||||||
|
* @throws IIOInvalidTreeException
|
||||||
|
*/
|
||||||
|
private static IIOMetadata setDPIViaAPI(IIOMetadata imageMetadata, int dpiX, int dpiY)
|
||||||
|
throws IIOInvalidTreeException {
|
||||||
|
// Derive the TIFFDirectory from the metadata.
|
||||||
|
TIFFDirectory dir = TIFFDirectory.createFromMetadata(imageMetadata);
|
||||||
|
|
||||||
|
// Get {X,Y}Resolution tags.
|
||||||
|
BaselineTIFFTagSet base = BaselineTIFFTagSet.getInstance();
|
||||||
|
TIFFTag tagXRes = base.getTag(BaselineTIFFTagSet.TAG_X_RESOLUTION);
|
||||||
|
TIFFTag tagYRes = base.getTag(BaselineTIFFTagSet.TAG_Y_RESOLUTION);
|
||||||
|
|
||||||
|
// Create {X,Y}Resolution fields.
|
||||||
|
TIFFField fieldXRes = new TIFFField(tagXRes, TIFFTag.TIFF_RATIONAL,
|
||||||
|
1, new long[][]{{dpiX, 1}});
|
||||||
|
TIFFField fieldYRes = new TIFFField(tagYRes, TIFFTag.TIFF_RATIONAL,
|
||||||
|
1, new long[][]{{dpiY, 1}});
|
||||||
|
|
||||||
|
// Append {X,Y}Resolution fields to directory.
|
||||||
|
dir.addTIFFField(fieldXRes);
|
||||||
|
dir.addTIFFField(fieldYRes);
|
||||||
|
|
||||||
|
// Convert to metadata object.
|
||||||
|
IIOMetadata metadata = dir.getAsMetadata();
|
||||||
|
|
||||||
|
// Add other metadata.
|
||||||
|
IIOMetadataNode root = new IIOMetadataNode("javax_imageio_1.0");
|
||||||
|
IIOMetadataNode horiz = new IIOMetadataNode("HorizontalPixelSize");
|
||||||
|
horiz.setAttribute("value", Double.toString(25.4f / dpiX));
|
||||||
|
IIOMetadataNode vert = new IIOMetadataNode("VerticalPixelSize");
|
||||||
|
vert.setAttribute("value", Double.toString(25.4f / dpiY));
|
||||||
|
IIOMetadataNode dim = new IIOMetadataNode("Dimension");
|
||||||
|
dim.appendChild(horiz);
|
||||||
|
dim.appendChild(vert);
|
||||||
|
root.appendChild(dim);
|
||||||
|
metadata.mergeTree("javax_imageio_1.0", root);
|
||||||
|
|
||||||
|
return metadata;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets pixel data of an <code>IIOImage</code> object.
|
||||||
|
*
|
||||||
|
* @param image an <code>IIOImage</code> object
|
||||||
|
* @return a byte buffer of pixel data
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public static ByteBuffer getImageByteBuffer(IIOImage image) throws IOException {
|
||||||
|
return getImageByteBuffer(image.getRenderedImage());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets pixel data of an <code>RenderedImage</code> object.
|
||||||
|
*
|
||||||
|
* @param image an <code>RenderedImage</code> object
|
||||||
|
* @return a byte buffer of pixel data
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public static ByteBuffer getImageByteBuffer(RenderedImage image) throws IOException {
|
||||||
|
//Set up the writeParam
|
||||||
|
TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.US);
|
||||||
|
tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED);
|
||||||
|
|
||||||
|
//Get tif writer and set output to file
|
||||||
|
Iterator<ImageWriter> writers = ImageIO.getImageWritersByFormatName(TIFF_FORMAT);
|
||||||
|
|
||||||
|
if (!writers.hasNext()) {
|
||||||
|
throw new RuntimeException(JAI_IMAGE_WRITER_MESSAGE);
|
||||||
|
}
|
||||||
|
|
||||||
|
ImageWriter writer = writers.next();
|
||||||
|
|
||||||
|
//Get the stream metadata
|
||||||
|
IIOMetadata streamMetadata = writer.getDefaultStreamMetadata(tiffWriteParam);
|
||||||
|
|
||||||
|
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
|
||||||
|
ImageOutputStream ios = ImageIO.createImageOutputStream(outputStream);
|
||||||
|
writer.setOutput(ios);
|
||||||
|
writer.write(streamMetadata, new IIOImage(image, null, null), tiffWriteParam);
|
||||||
|
// writer.write(image);
|
||||||
|
writer.dispose();
|
||||||
|
// ImageIO.write(image, "tiff", ios); // this can be used in lieu of writer
|
||||||
|
ios.seek(0);
|
||||||
|
BufferedImage bi = ImageIO.read(ios);
|
||||||
|
return convertImageData(bi);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts <code>BufferedImage</code> to <code>ByteBuffer</code>.
|
||||||
|
*
|
||||||
|
* @param bi Input image
|
||||||
|
* @return pixel data
|
||||||
|
*/
|
||||||
|
public static ByteBuffer convertImageData(BufferedImage bi) {
|
||||||
|
DataBuffer buff = bi.getRaster().getDataBuffer();
|
||||||
|
// ClassCastException thrown if buff not instanceof DataBufferByte because raster data is not necessarily bytes.
|
||||||
|
// Convert the original buffered image to grayscale.
|
||||||
|
if (!(buff instanceof DataBufferByte)) {
|
||||||
|
bi = ImageHelper.convertImageToGrayscale(bi);
|
||||||
|
buff = bi.getRaster().getDataBuffer();
|
||||||
|
}
|
||||||
|
byte[] pixelData = ((DataBufferByte) buff).getData();
|
||||||
|
// return ByteBuffer.wrap(pixelData);
|
||||||
|
ByteBuffer buf = ByteBuffer.allocateDirect(pixelData.length);
|
||||||
|
buf.order(ByteOrder.nativeOrder());
|
||||||
|
buf.put(pixelData);
|
||||||
|
buf.flip();
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a list of <code>BufferedImage</code> objects for an image file.
|
||||||
|
*
|
||||||
|
* @param imageFile input image file. It can be any of the supported
|
||||||
|
* formats, including TIFF, JPEG, GIF, PNG, BMP, JPEG
|
||||||
|
* @return a list of <code>BufferedImage</code> objects
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public static List<BufferedImage> getImageList(File imageFile) throws IOException {
|
||||||
|
ImageReader reader = null;
|
||||||
|
ImageInputStream iis = null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
List<BufferedImage> biList = new ArrayList<BufferedImage>();
|
||||||
|
|
||||||
|
String imageFileName = imageFile.getName();
|
||||||
|
String imageFormat = imageFileName.substring(imageFileName.lastIndexOf('.') + 1);
|
||||||
|
Iterator<ImageReader> readers = ImageIO.getImageReadersByFormatName(imageFormat);
|
||||||
|
if (!readers.hasNext()) {
|
||||||
|
throw new RuntimeException(JAI_IMAGE_READER_MESSAGE);
|
||||||
|
}
|
||||||
|
|
||||||
|
reader = readers.next();
|
||||||
|
|
||||||
|
iis = ImageIO.createImageInputStream(imageFile);
|
||||||
|
reader.setInput(iis);
|
||||||
|
|
||||||
|
int imageTotal = reader.getNumImages(true);
|
||||||
|
|
||||||
|
for (int i = 0; i < imageTotal; i++) {
|
||||||
|
BufferedImage bi = reader.read(i);
|
||||||
|
biList.add(bi);
|
||||||
|
}
|
||||||
|
|
||||||
|
return biList;
|
||||||
|
} finally {
|
||||||
|
try {
|
||||||
|
if (iis != null) {
|
||||||
|
iis.close();
|
||||||
|
}
|
||||||
|
if (reader != null) {
|
||||||
|
reader.dispose();
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a list of <code>IIOImage</code> objects for an image file.
|
||||||
|
*
|
||||||
|
* @param imageFile input image file. It can be any of the supported
|
||||||
|
* formats, including TIFF, JPEG, GIF, PNG, BMP, JPEG, and PDF if GPL
|
||||||
|
* Ghostscript is installed
|
||||||
|
* @return a list of <code>IIOImage</code> objects
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public static List<IIOImage> getIIOImageList(File imageFile) throws IOException {
|
||||||
|
File workingTiffFile = null;
|
||||||
|
|
||||||
|
ImageReader reader = null;
|
||||||
|
ImageInputStream iis = null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
// convert PDF to TIFF
|
||||||
|
if (imageFile.getName().toLowerCase().endsWith(".pdf")) {
|
||||||
|
workingTiffFile = PdfUtilities.convertPdf2Tiff(imageFile);
|
||||||
|
imageFile = workingTiffFile;
|
||||||
|
}
|
||||||
|
|
||||||
|
List<IIOImage> iioImageList = new ArrayList<IIOImage>();
|
||||||
|
|
||||||
|
String imageFileName = imageFile.getName();
|
||||||
|
String imageFormat = imageFileName.substring(imageFileName.lastIndexOf('.') + 1);
|
||||||
|
if (imageFormat.matches("(pbm|pgm|ppm)")) {
|
||||||
|
imageFormat = "pnm";
|
||||||
|
} else if (imageFormat.matches("(jp2|j2k|jpf|jpx|jpm)")) {
|
||||||
|
imageFormat = "jpeg2000";
|
||||||
|
}
|
||||||
|
Iterator<ImageReader> readers = ImageIO.getImageReadersByFormatName(imageFormat);
|
||||||
|
|
||||||
|
if (!readers.hasNext()) {
|
||||||
|
throw new RuntimeException(JAI_IMAGE_READER_MESSAGE);
|
||||||
|
}
|
||||||
|
|
||||||
|
reader = readers.next();
|
||||||
|
iis = ImageIO.createImageInputStream(imageFile);
|
||||||
|
reader.setInput(iis);
|
||||||
|
|
||||||
|
int imageTotal = reader.getNumImages(true);
|
||||||
|
|
||||||
|
for (int i = 0; i < imageTotal; i++) {
|
||||||
|
// IIOImage oimage = new IIOImage(reader.read(i), null, reader.getImageMetadata(i));
|
||||||
|
IIOImage oimage = reader.readAll(i, reader.getDefaultReadParam());
|
||||||
|
iioImageList.add(oimage);
|
||||||
|
}
|
||||||
|
|
||||||
|
return iioImageList;
|
||||||
|
} finally {
|
||||||
|
try {
|
||||||
|
if (iis != null) {
|
||||||
|
iis.close();
|
||||||
|
}
|
||||||
|
if (reader != null) {
|
||||||
|
reader.dispose();
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
if (workingTiffFile != null && workingTiffFile.exists()) {
|
||||||
|
workingTiffFile.delete();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a list of <code>IIOImage</code> objects for a
|
||||||
|
* <code>BufferedImage</code>.
|
||||||
|
*
|
||||||
|
* @param bi input image
|
||||||
|
* @return a list of <code>IIOImage</code> objects
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public static List<IIOImage> getIIOImageList(BufferedImage bi) throws IOException {
|
||||||
|
List<IIOImage> iioImageList = new ArrayList<IIOImage>();
|
||||||
|
IIOImage oimage = new IIOImage(bi, null, null);
|
||||||
|
iioImageList.add(oimage);
|
||||||
|
return iioImageList;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merges multiple images into one multi-page TIFF image.
|
||||||
|
*
|
||||||
|
* @param inputImages an array of image files
|
||||||
|
* @param outputTiff the output multi-page TIFF file
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public static void mergeTiff(File[] inputImages, File outputTiff) throws IOException {
|
||||||
|
if (inputImages.length == 0) {
|
||||||
|
// if no image
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Iterator<ImageWriter> writers = ImageIO.getImageWritersByFormatName(TIFF_FORMAT);
|
||||||
|
|
||||||
|
if (!writers.hasNext()) {
|
||||||
|
throw new RuntimeException(JAI_IMAGE_WRITER_MESSAGE);
|
||||||
|
}
|
||||||
|
|
||||||
|
ImageWriter writer = writers.next();
|
||||||
|
|
||||||
|
//Set up the writeParam
|
||||||
|
TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.US);
|
||||||
|
// tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED); // commented out to preserve original sizes
|
||||||
|
|
||||||
|
//Get the stream metadata
|
||||||
|
IIOMetadata streamMetadata = writer.getDefaultStreamMetadata(tiffWriteParam);
|
||||||
|
|
||||||
|
ImageOutputStream ios = ImageIO.createImageOutputStream(outputTiff);
|
||||||
|
writer.setOutput(ios);
|
||||||
|
|
||||||
|
boolean firstPage = true;
|
||||||
|
int index = 1;
|
||||||
|
for (File inputImage : inputImages) {
|
||||||
|
List<IIOImage> iioImages = getIIOImageList(inputImage);
|
||||||
|
for (IIOImage iioImage : iioImages) {
|
||||||
|
if (firstPage) {
|
||||||
|
writer.write(streamMetadata, iioImage, tiffWriteParam);
|
||||||
|
firstPage = false;
|
||||||
|
} else {
|
||||||
|
writer.writeInsert(index++, iioImage, tiffWriteParam);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ios.close();
|
||||||
|
|
||||||
|
writer.dispose();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merges multiple images into one multi-page TIFF image.
|
||||||
|
*
|
||||||
|
* @param inputImages an array of <code>BufferedImage</code>
|
||||||
|
* @param outputTiff the output TIFF file
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public static void mergeTiff(BufferedImage[] inputImages, File outputTiff) throws IOException {
|
||||||
|
mergeTiff(inputImages, outputTiff, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merges multiple images into one multi-page TIFF image.
|
||||||
|
*
|
||||||
|
* @param inputImages an array of <code>BufferedImage</code>
|
||||||
|
* @param outputTiff the output TIFF file
|
||||||
|
* @param compressionType valid values: LZW, CCITT T.6, PackBits
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public static void mergeTiff(BufferedImage[] inputImages, File outputTiff, String compressionType) throws IOException {
|
||||||
|
List<IIOImage> imageList = new ArrayList<IIOImage>();
|
||||||
|
|
||||||
|
for (BufferedImage inputImage : inputImages) {
|
||||||
|
imageList.add(new IIOImage(inputImage, null, null));
|
||||||
|
}
|
||||||
|
|
||||||
|
mergeTiff(imageList, outputTiff, compressionType);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merges multiple images into one multi-page TIFF image.
|
||||||
|
*
|
||||||
|
* @param imageList a list of <code>IIOImage</code> objects
|
||||||
|
* @param outputTiff the output TIFF file
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public static void mergeTiff(List<IIOImage> imageList, File outputTiff) throws IOException {
|
||||||
|
mergeTiff(imageList, outputTiff, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merges multiple images into one multi-page TIFF image.
|
||||||
|
*
|
||||||
|
* @param imageList a list of <code>IIOImage</code> objects
|
||||||
|
* @param outputTiff the output TIFF file
|
||||||
|
* @param compressionType valid values: LZW, CCITT T.6, PackBits
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public static void mergeTiff(List<IIOImage> imageList, File outputTiff, String compressionType) throws IOException {
|
||||||
|
if (imageList == null || imageList.isEmpty()) {
|
||||||
|
// if no image
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Iterator<ImageWriter> writers = ImageIO.getImageWritersByFormatName(TIFF_FORMAT);
|
||||||
|
if (!writers.hasNext()) {
|
||||||
|
throw new RuntimeException(JAI_IMAGE_WRITER_MESSAGE);
|
||||||
|
}
|
||||||
|
|
||||||
|
ImageWriter writer = writers.next();
|
||||||
|
|
||||||
|
//Set up the writeParam
|
||||||
|
TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.US);
|
||||||
|
// tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED); // comment out to preserve original sizes
|
||||||
|
if (compressionType != null) {
|
||||||
|
tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_EXPLICIT);
|
||||||
|
tiffWriteParam.setCompressionType(compressionType);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Get the stream metadata
|
||||||
|
IIOMetadata streamMetadata = writer.getDefaultStreamMetadata(tiffWriteParam);
|
||||||
|
|
||||||
|
ImageOutputStream ios = ImageIO.createImageOutputStream(outputTiff);
|
||||||
|
writer.setOutput(ios);
|
||||||
|
|
||||||
|
int dpiX = 300;
|
||||||
|
int dpiY = 300;
|
||||||
|
|
||||||
|
for (IIOImage iioImage : imageList) {
|
||||||
|
// Get the default image metadata.
|
||||||
|
ImageTypeSpecifier imageType = ImageTypeSpecifier.createFromRenderedImage(iioImage.getRenderedImage());
|
||||||
|
IIOMetadata imageMetadata = writer.getDefaultImageMetadata(imageType, null);
|
||||||
|
imageMetadata = setDPIViaAPI(imageMetadata, dpiX, dpiY);
|
||||||
|
iioImage.setMetadata(imageMetadata);
|
||||||
|
}
|
||||||
|
|
||||||
|
IIOImage firstIioImage = imageList.remove(0);
|
||||||
|
writer.write(streamMetadata, firstIioImage, tiffWriteParam);
|
||||||
|
|
||||||
|
int i = 1;
|
||||||
|
for (IIOImage iioImage : imageList) {
|
||||||
|
writer.writeInsert(i++, iioImage, tiffWriteParam);
|
||||||
|
}
|
||||||
|
ios.close();
|
||||||
|
|
||||||
|
writer.dispose();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Deskews image.
|
||||||
|
*
|
||||||
|
* @param imageFile input image
|
||||||
|
* @param minimumDeskewThreshold minimum deskew threshold (typically, 0.05d)
|
||||||
|
* @return temporary multi-page TIFF image file
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public static File deskewImage(File imageFile, double minimumDeskewThreshold) throws IOException {
|
||||||
|
List<BufferedImage> imageList = getImageList(imageFile);
|
||||||
|
for (int i = 0; i < imageList.size(); i++) {
|
||||||
|
BufferedImage bi = imageList.get(i);
|
||||||
|
ImageDeskew deskew = new ImageDeskew(bi);
|
||||||
|
double imageSkewAngle = deskew.getSkewAngle();
|
||||||
|
|
||||||
|
if ((imageSkewAngle > minimumDeskewThreshold || imageSkewAngle < -(minimumDeskewThreshold))) {
|
||||||
|
bi = ImageUtil.rotate(bi, -imageSkewAngle, bi.getWidth() / 2, bi.getHeight() / 2);
|
||||||
|
imageList.set(i, bi); // replace original with deskewed image
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
File tempImageFile = File.createTempFile(FilenameUtils.getBaseName(imageFile.getName()), ".tif");
|
||||||
|
ImageIOHelper.mergeTiff(imageList.toArray(new BufferedImage[0]), tempImageFile);
|
||||||
|
|
||||||
|
return tempImageFile;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads image meta data.
|
||||||
|
*
|
||||||
|
* @param oimage
|
||||||
|
* @return a map of meta data
|
||||||
|
*/
|
||||||
|
public static Map<String, String> readImageData(IIOImage oimage) {
|
||||||
|
Map<String, String> dict = new HashMap<String, String>();
|
||||||
|
|
||||||
|
IIOMetadata imageMetadata = oimage.getMetadata();
|
||||||
|
if (imageMetadata != null) {
|
||||||
|
IIOMetadataNode dimNode = (IIOMetadataNode) imageMetadata.getAsTree("javax_imageio_1.0");
|
||||||
|
NodeList nodes = dimNode.getElementsByTagName("HorizontalPixelSize");
|
||||||
|
int dpiX;
|
||||||
|
if (nodes.getLength() > 0) {
|
||||||
|
float dpcWidth = Float.parseFloat(nodes.item(0).getAttributes().item(0).getNodeValue());
|
||||||
|
dpiX = (int) Math.round(25.4f / dpcWidth);
|
||||||
|
} else {
|
||||||
|
dpiX = Toolkit.getDefaultToolkit().getScreenResolution();
|
||||||
|
}
|
||||||
|
dict.put("dpiX", String.valueOf(dpiX));
|
||||||
|
|
||||||
|
nodes = dimNode.getElementsByTagName("VerticalPixelSize");
|
||||||
|
int dpiY;
|
||||||
|
if (nodes.getLength() > 0) {
|
||||||
|
float dpcHeight = Float.parseFloat(nodes.item(0).getAttributes().item(0).getNodeValue());
|
||||||
|
dpiY = (int) Math.round(25.4f / dpcHeight);
|
||||||
|
} else {
|
||||||
|
dpiY = Toolkit.getDefaultToolkit().getScreenResolution();
|
||||||
|
}
|
||||||
|
dict.put("dpiY", String.valueOf(dpiY));
|
||||||
|
}
|
||||||
|
|
||||||
|
return dict;
|
||||||
|
}
|
||||||
|
}
|
232
NGCC/Tess4J/src/net/sourceforge/tess4j/util/LoadLibs.java
Normal file
232
NGCC/Tess4J/src/net/sourceforge/tess4j/util/LoadLibs.java
Normal file
@ -0,0 +1,232 @@
|
|||||||
|
/**
|
||||||
|
* Copyright @ 2014 Quan Nguyen
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||||
|
* use this file except in compliance with the License. You may obtain a copy of
|
||||||
|
* the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package net.sourceforge.tess4j.util;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.net.JarURLConnection;
|
||||||
|
import java.net.URISyntaxException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.net.URLConnection;
|
||||||
|
import java.util.Enumeration;
|
||||||
|
import java.util.jar.JarEntry;
|
||||||
|
import java.util.jar.JarFile;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.jboss.vfs.VFS;
|
||||||
|
import org.jboss.vfs.VirtualFile;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.sun.jna.Native;
|
||||||
|
import com.sun.jna.Platform;
|
||||||
|
|
||||||
|
import net.sourceforge.tess4j.TessAPI;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Loads native libraries from JAR or project folder.
|
||||||
|
*
|
||||||
|
* @author O.J. Sousa Rodrigues
|
||||||
|
* @author Quan Nguyen
|
||||||
|
*/
|
||||||
|
public class LoadLibs {
|
||||||
|
|
||||||
|
private static final String VFS_PROTOCOL = "vfs";
|
||||||
|
private static final String JNA_LIBRARY_PATH = "jna.library.path";
|
||||||
|
public static final String TESS4J_TEMP_DIR = new File(System.getProperty("java.io.tmpdir"), "tess4j").getPath();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Native library name.
|
||||||
|
*/
|
||||||
|
public static final String LIB_NAME = "libtesseract3051";
|
||||||
|
public static final String LIB_NAME_NON_WIN = "tesseract";
|
||||||
|
|
||||||
|
private static final org.slf4j.Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
|
||||||
|
|
||||||
|
static {
|
||||||
|
System.setProperty("jna.encoding", "UTF8");
|
||||||
|
File targetTempFolder = extractTessResources(Platform.RESOURCE_PREFIX);
|
||||||
|
if (targetTempFolder != null && targetTempFolder.exists()) {
|
||||||
|
String userCustomizedPath = System.getProperty(JNA_LIBRARY_PATH);
|
||||||
|
if (null == userCustomizedPath || userCustomizedPath.isEmpty()) {
|
||||||
|
System.setProperty(JNA_LIBRARY_PATH, targetTempFolder.getPath());
|
||||||
|
} else {
|
||||||
|
System.setProperty(JNA_LIBRARY_PATH, userCustomizedPath + File.pathSeparator + targetTempFolder.getPath());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Loads Tesseract library via JNA.
|
||||||
|
*
|
||||||
|
* @return TessAPI instance being loaded using
|
||||||
|
* <code>Native.loadLibrary()</code>.
|
||||||
|
*/
|
||||||
|
public static TessAPI getTessAPIInstance() {
|
||||||
|
return (TessAPI) Native.loadLibrary(getTesseractLibName(), TessAPI.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets native library name.
|
||||||
|
*
|
||||||
|
* @return the name of the tesseract library to be loaded using the
|
||||||
|
* <code>Native.register()</code>.
|
||||||
|
*/
|
||||||
|
public static String getTesseractLibName() {
|
||||||
|
return Platform.isWindows() ? LIB_NAME : LIB_NAME_NON_WIN;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts tesseract resources to temp folder.
|
||||||
|
*
|
||||||
|
* @param resourceName name of file or directory
|
||||||
|
* @return target path, which could be file or directory
|
||||||
|
*/
|
||||||
|
public static synchronized File extractTessResources(String resourceName) {
|
||||||
|
File targetPath = null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
targetPath = new File(TESS4J_TEMP_DIR, resourceName);
|
||||||
|
|
||||||
|
Enumeration<URL> resources = LoadLibs.class.getClassLoader().getResources(resourceName);
|
||||||
|
while (resources.hasMoreElements()) {
|
||||||
|
URL resourceUrl = resources.nextElement();
|
||||||
|
copyResources(resourceUrl, targetPath);
|
||||||
|
}
|
||||||
|
} catch (IOException | URISyntaxException e) {
|
||||||
|
logger.warn(e.getMessage(), e);
|
||||||
|
}
|
||||||
|
|
||||||
|
return targetPath;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copies resources to target folder.
|
||||||
|
*
|
||||||
|
* @param resourceUrl
|
||||||
|
* @param targetPath
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
static void copyResources(URL resourceUrl, File targetPath) throws IOException, URISyntaxException {
|
||||||
|
if (resourceUrl == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
URLConnection urlConnection = resourceUrl.openConnection();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copy resources either from inside jar or from project folder.
|
||||||
|
*/
|
||||||
|
if (urlConnection instanceof JarURLConnection) {
|
||||||
|
copyJarResourceToPath((JarURLConnection) urlConnection, targetPath);
|
||||||
|
} else if (VFS_PROTOCOL.equals(resourceUrl.getProtocol())) {
|
||||||
|
VirtualFile virtualFileOrFolder = VFS.getChild(resourceUrl.toURI());
|
||||||
|
copyFromWarToFolder(virtualFileOrFolder, targetPath);
|
||||||
|
} else {
|
||||||
|
File file = new File(resourceUrl.getPath());
|
||||||
|
if (file.isDirectory()) {
|
||||||
|
for (File resourceFile : FileUtils.listFiles(file, null, true)) {
|
||||||
|
int index = resourceFile.getPath().lastIndexOf(targetPath.getName()) + targetPath.getName().length();
|
||||||
|
File targetFile = new File(targetPath, resourceFile.getPath().substring(index));
|
||||||
|
if (!targetFile.exists() || targetFile.length() != resourceFile.length()) {
|
||||||
|
if (resourceFile.isFile()) {
|
||||||
|
FileUtils.copyFile(resourceFile, targetFile);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (!targetPath.exists() || targetPath.length() != file.length()) {
|
||||||
|
FileUtils.copyFile(file, targetPath);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copies resources from the jar file of the current thread and extract it
|
||||||
|
* to the destination path.
|
||||||
|
*
|
||||||
|
* @param jarConnection
|
||||||
|
* @param destPath destination file or directory
|
||||||
|
*/
|
||||||
|
static void copyJarResourceToPath(JarURLConnection jarConnection, File destPath) {
|
||||||
|
try (JarFile jarFile = jarConnection.getJarFile()) {
|
||||||
|
String jarConnectionEntryName = jarConnection.getEntryName();
|
||||||
|
if (!jarConnectionEntryName.endsWith("/")) {
|
||||||
|
jarConnectionEntryName += "/";
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Iterate all entries in the jar file.
|
||||||
|
*/
|
||||||
|
for (Enumeration<JarEntry> e = jarFile.entries(); e.hasMoreElements();) {
|
||||||
|
JarEntry jarEntry = e.nextElement();
|
||||||
|
String jarEntryName = jarEntry.getName();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract files only if they match the path.
|
||||||
|
*/
|
||||||
|
if (jarEntryName.startsWith(jarConnectionEntryName)) {
|
||||||
|
String filename = jarEntryName.substring(jarConnectionEntryName.length());
|
||||||
|
File targetFile = new File(destPath, filename);
|
||||||
|
|
||||||
|
if (jarEntry.isDirectory()) {
|
||||||
|
targetFile.mkdirs();
|
||||||
|
} else {
|
||||||
|
if (!targetFile.exists() || targetFile.length() != jarEntry.getSize()) {
|
||||||
|
try (InputStream is = jarFile.getInputStream(jarEntry);
|
||||||
|
OutputStream out = FileUtils.openOutputStream(targetFile)) {
|
||||||
|
IOUtils.copy(is, out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
logger.warn(e.getMessage(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copies resources from WAR to target folder.
|
||||||
|
*
|
||||||
|
* @param virtualFileOrFolder
|
||||||
|
* @param targetFolder
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
static void copyFromWarToFolder(VirtualFile virtualFileOrFolder, File targetFolder) throws IOException {
|
||||||
|
if (virtualFileOrFolder.isDirectory() && !virtualFileOrFolder.getName().contains(".")) {
|
||||||
|
if (targetFolder.getName().equalsIgnoreCase(virtualFileOrFolder.getName())) {
|
||||||
|
for (VirtualFile innerFileOrFolder : virtualFileOrFolder.getChildren()) {
|
||||||
|
copyFromWarToFolder(innerFileOrFolder, targetFolder);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
File innerTargetFolder = new File(targetFolder, virtualFileOrFolder.getName());
|
||||||
|
innerTargetFolder.mkdir();
|
||||||
|
for (VirtualFile innerFileOrFolder : virtualFileOrFolder.getChildren()) {
|
||||||
|
copyFromWarToFolder(innerFileOrFolder, innerTargetFolder);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
File targetFile = new File(targetFolder, virtualFileOrFolder.getName());
|
||||||
|
if (!targetFile.exists() || targetFile.length() != virtualFileOrFolder.getSize()) {
|
||||||
|
FileUtils.copyURLToFile(virtualFileOrFolder.asFileURL(), targetFile);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
34
NGCC/Tess4J/src/net/sourceforge/tess4j/util/LoggHelper.java
Normal file
34
NGCC/Tess4J/src/net/sourceforge/tess4j/util/LoggHelper.java
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
/**
|
||||||
|
* Copyright @ 2015 Quan Nguyen
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||||
|
* use this file except in compliance with the License. You may obtain a copy of
|
||||||
|
* the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package net.sourceforge.tess4j.util;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper for logging.
|
||||||
|
*
|
||||||
|
* @author O.J. Sousa Rodrigues
|
||||||
|
*/
|
||||||
|
public class LoggHelper extends Exception {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
LoggerConfig.INSTANCE.loadConfig();
|
||||||
|
|
||||||
|
StackTraceElement[] sTrace = this.getStackTrace();
|
||||||
|
String className = sTrace[0].getClassName();
|
||||||
|
|
||||||
|
return className;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,51 @@
|
|||||||
|
/**
|
||||||
|
* Copyright @ 2015 Quan Nguyen
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||||
|
* use this file except in compliance with the License. You may obtain a copy of
|
||||||
|
* the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package net.sourceforge.tess4j.util;
|
||||||
|
|
||||||
|
import org.slf4j.bridge.SLF4JBridgeHandler;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Logging configuration.
|
||||||
|
*
|
||||||
|
* @author O.J. Sousa Rodrigues
|
||||||
|
*/
|
||||||
|
public enum LoggerConfig {
|
||||||
|
|
||||||
|
INSTANCE;
|
||||||
|
|
||||||
|
private boolean isLoaded = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method loads the Logger configuration.
|
||||||
|
*
|
||||||
|
* @return true if the Logger configuration was loaded successfully.
|
||||||
|
*/
|
||||||
|
public boolean loadConfig() {
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (!isLoaded) {
|
||||||
|
SLF4JBridgeHandler.removeHandlersForRootLogger();
|
||||||
|
SLF4JBridgeHandler.install();
|
||||||
|
this.isLoaded = true;
|
||||||
|
// System.out.println("Logger configuration was loaded successfully.");
|
||||||
|
}
|
||||||
|
} catch (final Exception e) {
|
||||||
|
System.err.println("Logger configuration could not be loaded.");
|
||||||
|
}
|
||||||
|
|
||||||
|
return this.isLoaded;
|
||||||
|
}
|
||||||
|
}
|
215
NGCC/Tess4J/src/net/sourceforge/tess4j/util/PdfBoxUtilities.java
Normal file
215
NGCC/Tess4J/src/net/sourceforge/tess4j/util/PdfBoxUtilities.java
Normal file
@ -0,0 +1,215 @@
|
|||||||
|
/**
|
||||||
|
* Copyright @ 2018 Quan Nguyen
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||||
|
* use this file except in compliance with the License. You may obtain a copy of
|
||||||
|
* the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package net.sourceforge.tess4j.util;
|
||||||
|
|
||||||
|
import java.awt.image.BufferedImage;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FilenameFilter;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.pdfbox.io.MemoryUsageSetting;
|
||||||
|
import org.apache.pdfbox.multipdf.PDFMergerUtility;
|
||||||
|
import org.apache.pdfbox.multipdf.Splitter;
|
||||||
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||||
|
import org.apache.pdfbox.rendering.ImageType;
|
||||||
|
import org.apache.pdfbox.rendering.PDFRenderer;
|
||||||
|
import org.apache.pdfbox.tools.imageio.ImageIOUtil;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PDF utilities based on PDFBox.
|
||||||
|
*
|
||||||
|
* @author Robert Drysdale
|
||||||
|
* @author Quan Nguyen
|
||||||
|
*/
|
||||||
|
public class PdfBoxUtilities {
|
||||||
|
|
||||||
|
private static final org.slf4j.Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts PDF to TIFF format.
|
||||||
|
*
|
||||||
|
* @param inputPdfFile input file
|
||||||
|
* @return a multi-page TIFF image
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public static File convertPdf2Tiff(File inputPdfFile) throws IOException {
|
||||||
|
File[] pngFiles = null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
pngFiles = convertPdf2Png(inputPdfFile);
|
||||||
|
File tiffFile = File.createTempFile("multipage", ".tif");
|
||||||
|
|
||||||
|
// put PNG images into a single multi-page TIFF image for return
|
||||||
|
ImageIOHelper.mergeTiff(pngFiles, tiffFile);
|
||||||
|
return tiffFile;
|
||||||
|
} finally {
|
||||||
|
if (pngFiles != null && pngFiles.length > 0) {
|
||||||
|
// get the working directory of the PNG files
|
||||||
|
File pngDirectory = new File(pngFiles[0].getParent());
|
||||||
|
// delete temporary PNG images
|
||||||
|
for (File tempFile : pngFiles) {
|
||||||
|
tempFile.delete();
|
||||||
|
}
|
||||||
|
|
||||||
|
pngDirectory.delete();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts PDF to PNG format.
|
||||||
|
*
|
||||||
|
* @param inputPdfFile input file
|
||||||
|
* @return an array of PNG images
|
||||||
|
* @throws java.io.IOException
|
||||||
|
*/
|
||||||
|
public static File[] convertPdf2Png(File inputPdfFile) throws IOException {
|
||||||
|
Path path = Files.createTempDirectory("tessimages");
|
||||||
|
File imageDir = path.toFile();
|
||||||
|
|
||||||
|
PDDocument document = null;
|
||||||
|
try {
|
||||||
|
document = PDDocument.load(inputPdfFile);
|
||||||
|
PDFRenderer pdfRenderer = new PDFRenderer(document);
|
||||||
|
for (int page = 0; page < document.getNumberOfPages(); ++page) {
|
||||||
|
BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
|
||||||
|
|
||||||
|
// suffix in filename will be used as the file format
|
||||||
|
String filename = String.format("workingimage%04d.png", page + 1);
|
||||||
|
ImageIOUtil.writeImage(bim, new File(imageDir, filename).getAbsolutePath(), 300);
|
||||||
|
}
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
logger.error("Error extracting PDF Document => " + ioe);
|
||||||
|
} finally {
|
||||||
|
if (imageDir.list().length == 0) {
|
||||||
|
imageDir.delete();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (document != null) {
|
||||||
|
try {
|
||||||
|
document.close();
|
||||||
|
} catch (Exception e) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// find working files
|
||||||
|
File[] workingFiles = imageDir.listFiles(new FilenameFilter() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean accept(File dir, String name) {
|
||||||
|
return name.toLowerCase().matches("workingimage\\d{4}\\.png$");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
Arrays.sort(workingFiles, new Comparator<File>() {
|
||||||
|
@Override
|
||||||
|
public int compare(File f1, File f2) {
|
||||||
|
return f1.getName().compareTo(f2.getName());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return workingFiles;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Splits PDF.
|
||||||
|
*
|
||||||
|
* @param inputPdfFile input file
|
||||||
|
* @param outputPdfFile output file
|
||||||
|
* @param firstPage begin page
|
||||||
|
* @param lastPage end page
|
||||||
|
*/
|
||||||
|
public static void splitPdf(File inputPdfFile, File outputPdfFile, int firstPage, int lastPage) {
|
||||||
|
PDDocument document = null;
|
||||||
|
try {
|
||||||
|
document = PDDocument.load(inputPdfFile);
|
||||||
|
Splitter splitter = new Splitter();
|
||||||
|
|
||||||
|
splitter.setStartPage(firstPage);
|
||||||
|
splitter.setEndPage(lastPage);
|
||||||
|
splitter.setSplitAtPage(lastPage - firstPage + 1);
|
||||||
|
|
||||||
|
List<PDDocument> documents = splitter.split(document);
|
||||||
|
|
||||||
|
if (documents.size() == 1) {
|
||||||
|
PDDocument outputPdf = documents.get(0);
|
||||||
|
outputPdf.save(outputPdfFile);
|
||||||
|
outputPdf.close();
|
||||||
|
} else {
|
||||||
|
logger.error("Splitter returned " + documents.size() + " documents rather than expected of 1");
|
||||||
|
}
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
logger.error("Exception splitting PDF => " + ioe);
|
||||||
|
} finally {
|
||||||
|
if (document != null) {
|
||||||
|
try {
|
||||||
|
document.close();
|
||||||
|
} catch (Exception e) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets PDF Page Count.
|
||||||
|
*
|
||||||
|
* @param inputPdfFile input file
|
||||||
|
* @return number of pages
|
||||||
|
*/
|
||||||
|
public static int getPdfPageCount(File inputPdfFile) {
|
||||||
|
PDDocument document = null;
|
||||||
|
try {
|
||||||
|
document = PDDocument.load(inputPdfFile);
|
||||||
|
return document.getNumberOfPages();
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
logger.error("Error counting PDF pages => " + ioe);
|
||||||
|
return - 1;
|
||||||
|
} finally {
|
||||||
|
if (document != null) {
|
||||||
|
try {
|
||||||
|
document.close();
|
||||||
|
} catch (Exception e) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merges PDF files.
|
||||||
|
*
|
||||||
|
* @param inputPdfFiles array of input files
|
||||||
|
* @param outputPdfFile output file
|
||||||
|
*/
|
||||||
|
public static void mergePdf(File[] inputPdfFiles, File outputPdfFile) {
|
||||||
|
try {
|
||||||
|
PDFMergerUtility mergerUtility = new PDFMergerUtility();
|
||||||
|
mergerUtility.setDestinationFileName(outputPdfFile.getPath());
|
||||||
|
for (File inputPdfFile : inputPdfFiles) {
|
||||||
|
mergerUtility.addSource(inputPdfFile);
|
||||||
|
}
|
||||||
|
mergerUtility.mergeDocuments(MemoryUsageSetting.setupMainMemoryOnly());
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
logger.error("Error counting PDF pages => " + ioe);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
319
NGCC/Tess4J/src/net/sourceforge/tess4j/util/PdfGsUtilities.java
Normal file
319
NGCC/Tess4J/src/net/sourceforge/tess4j/util/PdfGsUtilities.java
Normal file
@ -0,0 +1,319 @@
|
|||||||
|
/**
|
||||||
|
* Copyright @ 2009 Quan Nguyen
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||||
|
* use this file except in compliance with the License. You may obtain a copy of
|
||||||
|
* the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package net.sourceforge.tess4j.util;
|
||||||
|
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FilenameFilter;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.ghost4j.Ghostscript;
|
||||||
|
import org.ghost4j.GhostscriptException;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PDF utilities based on Ghostscript.
|
||||||
|
*/
|
||||||
|
public class PdfGsUtilities {
|
||||||
|
|
||||||
|
public static final String GS_INSTALL = "\nPlease download, install GPL Ghostscript from http://www.ghostscript.com\nand/or set the appropriate path variable.";
|
||||||
|
|
||||||
|
private static final org.slf4j.Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts PDF to TIFF format.
|
||||||
|
*
|
||||||
|
* @param inputPdfFile input file
|
||||||
|
* @return a multi-page TIFF image
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public static File convertPdf2Tiff(File inputPdfFile) throws IOException {
|
||||||
|
File[] pngFiles = null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
pngFiles = convertPdf2Png(inputPdfFile);
|
||||||
|
File tiffFile = File.createTempFile("multipage", ".tif");
|
||||||
|
|
||||||
|
// put PNG images into a single multi-page TIFF image for return
|
||||||
|
ImageIOHelper.mergeTiff(pngFiles, tiffFile);
|
||||||
|
return tiffFile;
|
||||||
|
} finally {
|
||||||
|
if (pngFiles != null && pngFiles.length > 0) {
|
||||||
|
// get the working directory of the PNG files
|
||||||
|
File pngDirectory = new File(pngFiles[0].getParent());
|
||||||
|
// delete temporary PNG images
|
||||||
|
for (File tempFile : pngFiles) {
|
||||||
|
tempFile.delete();
|
||||||
|
}
|
||||||
|
|
||||||
|
pngDirectory.delete();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts PDF to PNG format.
|
||||||
|
*
|
||||||
|
* @param inputPdfFile input file
|
||||||
|
* @return an array of PNG images
|
||||||
|
* @throws java.io.IOException
|
||||||
|
*/
|
||||||
|
public synchronized static File[] convertPdf2Png(File inputPdfFile) throws IOException {
|
||||||
|
Path path = Files.createTempDirectory("tessimages");
|
||||||
|
File imageDir = path.toFile();
|
||||||
|
|
||||||
|
//get Ghostscript instance
|
||||||
|
Ghostscript gs = Ghostscript.getInstance();
|
||||||
|
|
||||||
|
//prepare Ghostscript interpreter parameters
|
||||||
|
//refer to Ghostscript documentation for parameter usage
|
||||||
|
List<String> gsArgs = new ArrayList<String>();
|
||||||
|
gsArgs.add("-gs");
|
||||||
|
gsArgs.add("-dNOPAUSE");
|
||||||
|
gsArgs.add("-dQUIET");
|
||||||
|
gsArgs.add("-dBATCH");
|
||||||
|
gsArgs.add("-dSAFER");
|
||||||
|
gsArgs.add("-sDEVICE=pnggray");
|
||||||
|
gsArgs.add("-r300");
|
||||||
|
gsArgs.add("-dGraphicsAlphaBits=4");
|
||||||
|
gsArgs.add("-dTextAlphaBits=4");
|
||||||
|
gsArgs.add("-sOutputFile=" + imageDir.getPath() + "/workingimage%04d.png");
|
||||||
|
gsArgs.add(inputPdfFile.getPath());
|
||||||
|
|
||||||
|
//execute and exit interpreter
|
||||||
|
try {
|
||||||
|
synchronized (gs) {
|
||||||
|
gs.initialize(gsArgs.toArray(new String[0]));
|
||||||
|
gs.exit();
|
||||||
|
}
|
||||||
|
} catch (UnsatisfiedLinkError e) {
|
||||||
|
logger.error(e.getMessage());
|
||||||
|
throw new RuntimeException(getMessage(e.getMessage()));
|
||||||
|
} catch (NoClassDefFoundError e) {
|
||||||
|
logger.error(e.getMessage());
|
||||||
|
throw new RuntimeException(getMessage(e.getMessage()));
|
||||||
|
} catch (GhostscriptException e) {
|
||||||
|
logger.error(e.getMessage());
|
||||||
|
throw new RuntimeException(e.getMessage());
|
||||||
|
} finally {
|
||||||
|
if (imageDir.list().length == 0) {
|
||||||
|
imageDir.delete();
|
||||||
|
}
|
||||||
|
|
||||||
|
//delete interpreter instance (safer)
|
||||||
|
try {
|
||||||
|
Ghostscript.deleteInstance();
|
||||||
|
} catch (GhostscriptException e) {
|
||||||
|
//nothing
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// find working files
|
||||||
|
File[] workingFiles = imageDir.listFiles(new FilenameFilter() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean accept(File dir, String name) {
|
||||||
|
return name.toLowerCase().matches("workingimage\\d{4}\\.png$");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
Arrays.sort(workingFiles, new Comparator<File>() {
|
||||||
|
@Override
|
||||||
|
public int compare(File f1, File f2) {
|
||||||
|
return f1.getName().compareTo(f2.getName());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return workingFiles;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Splits PDF.
|
||||||
|
*
|
||||||
|
* @param inputPdfFile input file
|
||||||
|
* @param outputPdfFile output file
|
||||||
|
* @param firstPage begin page
|
||||||
|
* @param lastPage end page
|
||||||
|
*/
|
||||||
|
public static void splitPdf(File inputPdfFile, File outputPdfFile, int firstPage, int lastPage) {
|
||||||
|
//get Ghostscript instance
|
||||||
|
Ghostscript gs = Ghostscript.getInstance();
|
||||||
|
|
||||||
|
//prepare Ghostscript interpreter parameters
|
||||||
|
//refer to Ghostscript documentation for parameter usage
|
||||||
|
//gs -sDEVICE=pdfwrite -dNOPAUSE -dQUIET -dBATCH -dFirstPage=m -dLastPage=n -sOutputFile=out.pdf in.pdf
|
||||||
|
List<String> gsArgs = new ArrayList<String>();
|
||||||
|
gsArgs.add("-gs");
|
||||||
|
gsArgs.add("-dNOPAUSE");
|
||||||
|
gsArgs.add("-dQUIET");
|
||||||
|
gsArgs.add("-dBATCH");
|
||||||
|
gsArgs.add("-sDEVICE=pdfwrite");
|
||||||
|
if (firstPage > 0) {
|
||||||
|
gsArgs.add("-dFirstPage=" + firstPage);
|
||||||
|
}
|
||||||
|
if (lastPage > 0) {
|
||||||
|
gsArgs.add("-dLastPage=" + lastPage);
|
||||||
|
}
|
||||||
|
gsArgs.add("-sOutputFile=" + outputPdfFile.getPath());
|
||||||
|
gsArgs.add(inputPdfFile.getPath());
|
||||||
|
|
||||||
|
//execute and exit interpreter
|
||||||
|
try {
|
||||||
|
synchronized (gs) {
|
||||||
|
gs.initialize(gsArgs.toArray(new String[0]));
|
||||||
|
gs.exit();
|
||||||
|
}
|
||||||
|
} catch (UnsatisfiedLinkError e) {
|
||||||
|
logger.error(e.getMessage());
|
||||||
|
throw new RuntimeException(getMessage(e.getMessage()));
|
||||||
|
} catch (NoClassDefFoundError e) {
|
||||||
|
logger.error(e.getMessage());
|
||||||
|
throw new RuntimeException(getMessage(e.getMessage()));
|
||||||
|
} catch (GhostscriptException e) {
|
||||||
|
logger.error(e.getMessage());
|
||||||
|
throw new RuntimeException(e.getMessage());
|
||||||
|
} finally {
|
||||||
|
//delete interpreter instance (safer)
|
||||||
|
try {
|
||||||
|
Ghostscript.deleteInstance();
|
||||||
|
} catch (GhostscriptException e) {
|
||||||
|
//nothing
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets PDF Page Count.
|
||||||
|
*
|
||||||
|
* @param inputPdfFile input file
|
||||||
|
* @return number of pages
|
||||||
|
*/
|
||||||
|
public static int getPdfPageCount(File inputPdfFile) {
|
||||||
|
//get Ghostscript instance
|
||||||
|
Ghostscript gs = Ghostscript.getInstance();
|
||||||
|
|
||||||
|
//prepare Ghostscript interpreter parameters
|
||||||
|
//refer to Ghostscript documentation for parameter usage
|
||||||
|
//gs -q -dNODISPLAY -c "(input.pdf) (r) file runpdfbegin pdfpagecount = quit"
|
||||||
|
List<String> gsArgs = new ArrayList<String>();
|
||||||
|
gsArgs.add("-gs");
|
||||||
|
gsArgs.add("-dNOPAUSE");
|
||||||
|
gsArgs.add("-dQUIET");
|
||||||
|
gsArgs.add("-dNODISPLAY");
|
||||||
|
gsArgs.add("-dBATCH");
|
||||||
|
gsArgs.add("-c");
|
||||||
|
String cValue = String.format("(%s) (r) file runpdfbegin pdfpagecount = quit", inputPdfFile.getPath().replace('\\', '/'));
|
||||||
|
gsArgs.add(cValue);
|
||||||
|
|
||||||
|
int pageCount = 0;
|
||||||
|
ByteArrayOutputStream os;
|
||||||
|
|
||||||
|
//execute and exit interpreter
|
||||||
|
try {
|
||||||
|
synchronized (gs) {
|
||||||
|
//output
|
||||||
|
os = new ByteArrayOutputStream();
|
||||||
|
gs.setStdOut(os);
|
||||||
|
gs.initialize(gsArgs.toArray(new String[0]));
|
||||||
|
pageCount = Integer.parseInt(os.toString().trim());
|
||||||
|
os.close();
|
||||||
|
}
|
||||||
|
} catch (UnsatisfiedLinkError e) {
|
||||||
|
logger.error(e.getMessage());
|
||||||
|
throw new RuntimeException(getMessage(e.getMessage()));
|
||||||
|
} catch (NoClassDefFoundError e) {
|
||||||
|
logger.error(e.getMessage());
|
||||||
|
throw new RuntimeException(getMessage(e.getMessage()));
|
||||||
|
} catch (GhostscriptException e) {
|
||||||
|
logger.error(e.getMessage());
|
||||||
|
throw new RuntimeException(e.getMessage());
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error(e.getMessage());
|
||||||
|
} finally {
|
||||||
|
//delete interpreter instance (safer)
|
||||||
|
try {
|
||||||
|
Ghostscript.deleteInstance();
|
||||||
|
} catch (GhostscriptException e) {
|
||||||
|
//nothing
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return pageCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merges PDF files.
|
||||||
|
*
|
||||||
|
* @param inputPdfFiles array of input files
|
||||||
|
* @param outputPdfFile output file
|
||||||
|
*/
|
||||||
|
public static void mergePdf(File[] inputPdfFiles, File outputPdfFile) {
|
||||||
|
//get Ghostscript instance
|
||||||
|
Ghostscript gs = Ghostscript.getInstance();
|
||||||
|
|
||||||
|
//prepare Ghostscript interpreter parameters
|
||||||
|
//refer to Ghostscript documentation for parameter usage
|
||||||
|
//gs -sDEVICE=pdfwrite -dNOPAUSE -dQUIET -dBATCH -sOutputFile=out.pdf in1.pdf in2.pdf in3.pdf
|
||||||
|
List<String> gsArgs = new ArrayList<String>();
|
||||||
|
gsArgs.add("-gs");
|
||||||
|
gsArgs.add("-dNOPAUSE");
|
||||||
|
gsArgs.add("-dQUIET");
|
||||||
|
gsArgs.add("-dBATCH");
|
||||||
|
gsArgs.add("-sDEVICE=pdfwrite");
|
||||||
|
gsArgs.add("-sOutputFile=" + outputPdfFile.getPath());
|
||||||
|
|
||||||
|
for (File inputPdfFile : inputPdfFiles) {
|
||||||
|
gsArgs.add(inputPdfFile.getPath());
|
||||||
|
}
|
||||||
|
|
||||||
|
//execute and exit interpreter
|
||||||
|
try {
|
||||||
|
synchronized (gs) {
|
||||||
|
gs.initialize(gsArgs.toArray(new String[0]));
|
||||||
|
gs.exit();
|
||||||
|
}
|
||||||
|
} catch (UnsatisfiedLinkError e) {
|
||||||
|
logger.error(e.getMessage());
|
||||||
|
throw new RuntimeException(getMessage(e.getMessage()));
|
||||||
|
} catch (NoClassDefFoundError e) {
|
||||||
|
logger.error(e.getMessage());
|
||||||
|
throw new RuntimeException(getMessage(e.getMessage()));
|
||||||
|
} catch (GhostscriptException e) {
|
||||||
|
logger.error(e.getMessage());
|
||||||
|
throw new RuntimeException(e.getMessage());
|
||||||
|
} finally {
|
||||||
|
//delete interpreter instance (safer)
|
||||||
|
try {
|
||||||
|
Ghostscript.deleteInstance();
|
||||||
|
} catch (GhostscriptException e) {
|
||||||
|
//nothing
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static String getMessage(String message) {
|
||||||
|
if (message.contains("library 'gs") || message.contains("ghost4j")) {
|
||||||
|
return message + GS_INSTALL;
|
||||||
|
}
|
||||||
|
return message;
|
||||||
|
}
|
||||||
|
}
|
163
NGCC/Tess4J/src/net/sourceforge/tess4j/util/PdfUtilities.java
Normal file
163
NGCC/Tess4J/src/net/sourceforge/tess4j/util/PdfUtilities.java
Normal file
@ -0,0 +1,163 @@
|
|||||||
|
/**
|
||||||
|
* Copyright @ 2009 Quan Nguyen
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||||
|
* use this file except in compliance with the License. You may obtain a copy of
|
||||||
|
* the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package net.sourceforge.tess4j.util;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PDF utilities based on Ghostscript or PDFBox with Ghostscript as default. If
|
||||||
|
* Ghostscript is not available on the system, then PDFBox is used. Call
|
||||||
|
* <code>System.setProperty(PDF_LIBRARY, PDFBOX);</code> to set PDFBox as
|
||||||
|
* default.
|
||||||
|
*/
|
||||||
|
public class PdfUtilities {
|
||||||
|
|
||||||
|
public static final String PDF_LIBRARY = "pdf.library";
|
||||||
|
public static final String PDFBOX = "pdfbox";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts PDF to TIFF format.
|
||||||
|
*
|
||||||
|
* @param inputPdfFile input file
|
||||||
|
* @return a multi-page TIFF image
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public static File convertPdf2Tiff(File inputPdfFile) throws IOException {
|
||||||
|
if (PDFBOX.equals(System.getProperty(PDF_LIBRARY))) {
|
||||||
|
return PdfBoxUtilities.convertPdf2Tiff(inputPdfFile);
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
return PdfGsUtilities.convertPdf2Tiff(inputPdfFile);
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.setProperty(PDF_LIBRARY, PDFBOX);
|
||||||
|
return convertPdf2Tiff(inputPdfFile);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts PDF to PNG format.
|
||||||
|
*
|
||||||
|
* @param inputPdfFile input file
|
||||||
|
* @return an array of PNG images
|
||||||
|
* @throws java.io.IOException
|
||||||
|
*/
|
||||||
|
public static File[] convertPdf2Png(File inputPdfFile) throws IOException {
|
||||||
|
if (PDFBOX.equals(System.getProperty(PDF_LIBRARY))) {
|
||||||
|
return PdfBoxUtilities.convertPdf2Png(inputPdfFile);
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
return PdfGsUtilities.convertPdf2Png(inputPdfFile);
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.setProperty(PDF_LIBRARY, PDFBOX);
|
||||||
|
return convertPdf2Png(inputPdfFile);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Splits PDF.
|
||||||
|
*
|
||||||
|
* @deprecated As of Release 3.0.
|
||||||
|
*
|
||||||
|
* @param inputPdfFile input file
|
||||||
|
* @param outputPdfFile output file
|
||||||
|
* @param firstPage begin page
|
||||||
|
* @param lastPage end page
|
||||||
|
*/
|
||||||
|
public static void splitPdf(String inputPdfFile, String outputPdfFile, String firstPage, String lastPage) {
|
||||||
|
if (firstPage.trim().isEmpty()) {
|
||||||
|
firstPage = "0";
|
||||||
|
}
|
||||||
|
if (lastPage.trim().isEmpty()) {
|
||||||
|
lastPage = "0";
|
||||||
|
}
|
||||||
|
|
||||||
|
splitPdf(new File(inputPdfFile), new File(outputPdfFile), Integer.parseInt(firstPage), Integer.parseInt(lastPage));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Splits PDF.
|
||||||
|
*
|
||||||
|
* @param inputPdfFile input file
|
||||||
|
* @param outputPdfFile output file
|
||||||
|
* @param firstPage begin page
|
||||||
|
* @param lastPage end page
|
||||||
|
*/
|
||||||
|
public static void splitPdf(File inputPdfFile, File outputPdfFile, int firstPage, int lastPage) {
|
||||||
|
if (PDFBOX.equals(System.getProperty(PDF_LIBRARY))) {
|
||||||
|
PdfBoxUtilities.splitPdf(inputPdfFile, outputPdfFile, firstPage, lastPage);
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
PdfGsUtilities.splitPdf(inputPdfFile, outputPdfFile, firstPage, lastPage);
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.setProperty(PDF_LIBRARY, PDFBOX);
|
||||||
|
splitPdf(inputPdfFile, outputPdfFile, firstPage, lastPage);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets PDF Page Count.
|
||||||
|
*
|
||||||
|
* @deprecated As of Release 3.0.
|
||||||
|
*
|
||||||
|
* @param inputPdfFile input file
|
||||||
|
* @return number of pages
|
||||||
|
*/
|
||||||
|
public static int getPdfPageCount(String inputPdfFile) {
|
||||||
|
return getPdfPageCount(new File(inputPdfFile));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets PDF Page Count.
|
||||||
|
*
|
||||||
|
* @param inputPdfFile input file
|
||||||
|
* @return number of pages
|
||||||
|
*/
|
||||||
|
public static int getPdfPageCount(File inputPdfFile) {
|
||||||
|
if (PDFBOX.equals(System.getProperty(PDF_LIBRARY))) {
|
||||||
|
return PdfBoxUtilities.getPdfPageCount(inputPdfFile);
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
return PdfGsUtilities.getPdfPageCount(inputPdfFile);
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.setProperty(PDF_LIBRARY, PDFBOX);
|
||||||
|
return getPdfPageCount(inputPdfFile);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merges PDF files.
|
||||||
|
*
|
||||||
|
* @param inputPdfFiles array of input files
|
||||||
|
* @param outputPdfFile output file
|
||||||
|
*/
|
||||||
|
public static void mergePdf(File[] inputPdfFiles, File outputPdfFile) {
|
||||||
|
if (PDFBOX.equals(System.getProperty(PDF_LIBRARY))) {
|
||||||
|
PdfBoxUtilities.mergePdf(inputPdfFiles, outputPdfFile);
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
PdfGsUtilities.mergePdf(inputPdfFiles, outputPdfFile);
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.setProperty(PDF_LIBRARY, PDFBOX);
|
||||||
|
mergePdf(inputPdfFiles, outputPdfFile);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
74
NGCC/Tess4J/src/net/sourceforge/tess4j/util/Utils.java
Normal file
74
NGCC/Tess4J/src/net/sourceforge/tess4j/util/Utils.java
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
/*
|
||||||
|
* Copyright @ 2013 Quan Nguyen
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package net.sourceforge.tess4j.util;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.lang.reflect.Field;
|
||||||
|
import java.lang.reflect.Modifier;
|
||||||
|
|
||||||
|
public class Utils {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes byte array to file.
|
||||||
|
*
|
||||||
|
* @param data byte array
|
||||||
|
* @param outFile output file
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public static void writeFile(byte[] data, File outFile) throws IOException {
|
||||||
|
FileOutputStream fos = null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
// create parent dirs when necessary
|
||||||
|
if (outFile.getParentFile() != null) {
|
||||||
|
outFile.getParentFile().mkdirs();
|
||||||
|
}
|
||||||
|
|
||||||
|
fos = new FileOutputStream(outFile);
|
||||||
|
fos.write(data);
|
||||||
|
} finally {
|
||||||
|
if (fos != null) {
|
||||||
|
fos.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets user-friendly name of the public static final constant defined in a
|
||||||
|
* class or an interface for display purpose.
|
||||||
|
*
|
||||||
|
* @param value the constant value
|
||||||
|
* @param c type of class or interface
|
||||||
|
* @return name
|
||||||
|
*/
|
||||||
|
public static String getConstantName(Object value, Class c) {
|
||||||
|
for (Field f : c.getDeclaredFields()) {
|
||||||
|
int mod = f.getModifiers();
|
||||||
|
if (Modifier.isStatic(mod) && Modifier.isPublic(mod) && Modifier.isFinal(mod)) {
|
||||||
|
try {
|
||||||
|
if (f.get(null).equals(value)) {
|
||||||
|
return f.getName();
|
||||||
|
}
|
||||||
|
} catch (IllegalAccessException e) {
|
||||||
|
return String.valueOf(value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return String.valueOf(value);
|
||||||
|
}
|
||||||
|
}
|
1
NGCC/Tess4J/tessdata/configs/api_config
Normal file
1
NGCC/Tess4J/tessdata/configs/api_config
Normal file
@ -0,0 +1 @@
|
|||||||
|
tessedit_zero_rejection T
|
1
NGCC/Tess4J/tessdata/configs/digits
Normal file
1
NGCC/Tess4J/tessdata/configs/digits
Normal file
@ -0,0 +1 @@
|
|||||||
|
tessedit_char_whitelist 0123456789-.
|
1
NGCC/Tess4J/tessdata/configs/hocr
Normal file
1
NGCC/Tess4J/tessdata/configs/hocr
Normal file
@ -0,0 +1 @@
|
|||||||
|
tessedit_create_hocr 1
|
BIN
NGCC/Tess4J/tessdata/eng.traineddata
Normal file
BIN
NGCC/Tess4J/tessdata/eng.traineddata
Normal file
Binary file not shown.
BIN
NGCC/Tess4J/tessdata/osd.traineddata
Normal file
BIN
NGCC/Tess4J/tessdata/osd.traineddata
Normal file
Binary file not shown.
BIN
NGCC/Tess4J/tessdata/pdf.ttf
Normal file
BIN
NGCC/Tess4J/tessdata/pdf.ttf
Normal file
Binary file not shown.
793
NGCC/Tess4J/tessdata/pdf.ttx
Normal file
793
NGCC/Tess4J/tessdata/pdf.ttx
Normal file
@ -0,0 +1,793 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<ttFont sfntVersion="\x00\x01\x00\x00" ttLibVersion="2.4">
|
||||||
|
|
||||||
|
<GlyphOrder>
|
||||||
|
<!-- The 'id' attribute is only for humans; it is ignored when parsed. -->
|
||||||
|
<GlyphID id="0" name=".notdef"/>
|
||||||
|
<GlyphID id="1" name="glyph00001"/>
|
||||||
|
<GlyphID id="2" name="glyph00002"/>
|
||||||
|
<GlyphID id="3" name="glyph00003"/>
|
||||||
|
<GlyphID id="4" name="glyph00004"/>
|
||||||
|
<GlyphID id="5" name="glyph00005"/>
|
||||||
|
<GlyphID id="6" name="glyph00006"/>
|
||||||
|
<GlyphID id="7" name="glyph00007"/>
|
||||||
|
<GlyphID id="8" name="glyph00008"/>
|
||||||
|
<GlyphID id="9" name="glyph00009"/>
|
||||||
|
<GlyphID id="10" name="glyph00010"/>
|
||||||
|
<GlyphID id="11" name="glyph00011"/>
|
||||||
|
<GlyphID id="12" name="glyph00012"/>
|
||||||
|
<GlyphID id="13" name="glyph00013"/>
|
||||||
|
<GlyphID id="14" name="glyph00014"/>
|
||||||
|
<GlyphID id="15" name="glyph00015"/>
|
||||||
|
<GlyphID id="16" name="glyph00016"/>
|
||||||
|
<GlyphID id="17" name="glyph00017"/>
|
||||||
|
<GlyphID id="18" name="glyph00018"/>
|
||||||
|
<GlyphID id="19" name="glyph00019"/>
|
||||||
|
<GlyphID id="20" name="glyph00020"/>
|
||||||
|
<GlyphID id="21" name="glyph00021"/>
|
||||||
|
<GlyphID id="22" name="glyph00022"/>
|
||||||
|
<GlyphID id="23" name="glyph00023"/>
|
||||||
|
<GlyphID id="24" name="glyph00024"/>
|
||||||
|
<GlyphID id="25" name="glyph00025"/>
|
||||||
|
<GlyphID id="26" name="glyph00026"/>
|
||||||
|
<GlyphID id="27" name="glyph00027"/>
|
||||||
|
<GlyphID id="28" name="glyph00028"/>
|
||||||
|
<GlyphID id="29" name="glyph00029"/>
|
||||||
|
<GlyphID id="30" name="glyph00030"/>
|
||||||
|
<GlyphID id="31" name="glyph00031"/>
|
||||||
|
<GlyphID id="32" name="glyph00032"/>
|
||||||
|
<GlyphID id="33" name="glyph00033"/>
|
||||||
|
<GlyphID id="34" name="glyph00034"/>
|
||||||
|
<GlyphID id="35" name="glyph00035"/>
|
||||||
|
<GlyphID id="36" name="glyph00036"/>
|
||||||
|
<GlyphID id="37" name="glyph00037"/>
|
||||||
|
<GlyphID id="38" name="glyph00038"/>
|
||||||
|
<GlyphID id="39" name="glyph00039"/>
|
||||||
|
<GlyphID id="40" name="glyph00040"/>
|
||||||
|
<GlyphID id="41" name="glyph00041"/>
|
||||||
|
<GlyphID id="42" name="glyph00042"/>
|
||||||
|
<GlyphID id="43" name="glyph00043"/>
|
||||||
|
<GlyphID id="44" name="glyph00044"/>
|
||||||
|
<GlyphID id="45" name="glyph00045"/>
|
||||||
|
<GlyphID id="46" name="glyph00046"/>
|
||||||
|
<GlyphID id="47" name="glyph00047"/>
|
||||||
|
<GlyphID id="48" name="glyph00048"/>
|
||||||
|
<GlyphID id="49" name="glyph00049"/>
|
||||||
|
<GlyphID id="50" name="glyph00050"/>
|
||||||
|
<GlyphID id="51" name="glyph00051"/>
|
||||||
|
<GlyphID id="52" name="glyph00052"/>
|
||||||
|
<GlyphID id="53" name="glyph00053"/>
|
||||||
|
<GlyphID id="54" name="glyph00054"/>
|
||||||
|
<GlyphID id="55" name="glyph00055"/>
|
||||||
|
<GlyphID id="56" name="glyph00056"/>
|
||||||
|
<GlyphID id="57" name="glyph00057"/>
|
||||||
|
<GlyphID id="58" name="glyph00058"/>
|
||||||
|
<GlyphID id="59" name="glyph00059"/>
|
||||||
|
<GlyphID id="60" name="glyph00060"/>
|
||||||
|
<GlyphID id="61" name="glyph00061"/>
|
||||||
|
<GlyphID id="62" name="glyph00062"/>
|
||||||
|
<GlyphID id="63" name="glyph00063"/>
|
||||||
|
<GlyphID id="64" name="glyph00064"/>
|
||||||
|
<GlyphID id="65" name="glyph00065"/>
|
||||||
|
<GlyphID id="66" name="glyph00066"/>
|
||||||
|
<GlyphID id="67" name="glyph00067"/>
|
||||||
|
<GlyphID id="68" name="glyph00068"/>
|
||||||
|
<GlyphID id="69" name="glyph00069"/>
|
||||||
|
<GlyphID id="70" name="glyph00070"/>
|
||||||
|
<GlyphID id="71" name="glyph00071"/>
|
||||||
|
<GlyphID id="72" name="glyph00072"/>
|
||||||
|
<GlyphID id="73" name="glyph00073"/>
|
||||||
|
<GlyphID id="74" name="glyph00074"/>
|
||||||
|
<GlyphID id="75" name="glyph00075"/>
|
||||||
|
<GlyphID id="76" name="glyph00076"/>
|
||||||
|
<GlyphID id="77" name="glyph00077"/>
|
||||||
|
<GlyphID id="78" name="glyph00078"/>
|
||||||
|
<GlyphID id="79" name="glyph00079"/>
|
||||||
|
<GlyphID id="80" name="glyph00080"/>
|
||||||
|
<GlyphID id="81" name="glyph00081"/>
|
||||||
|
<GlyphID id="82" name="glyph00082"/>
|
||||||
|
<GlyphID id="83" name="glyph00083"/>
|
||||||
|
<GlyphID id="84" name="glyph00084"/>
|
||||||
|
<GlyphID id="85" name="glyph00085"/>
|
||||||
|
<GlyphID id="86" name="glyph00086"/>
|
||||||
|
<GlyphID id="87" name="glyph00087"/>
|
||||||
|
<GlyphID id="88" name="glyph00088"/>
|
||||||
|
<GlyphID id="89" name="glyph00089"/>
|
||||||
|
<GlyphID id="90" name="glyph00090"/>
|
||||||
|
<GlyphID id="91" name="glyph00091"/>
|
||||||
|
<GlyphID id="92" name="glyph00092"/>
|
||||||
|
<GlyphID id="93" name="glyph00093"/>
|
||||||
|
<GlyphID id="94" name="glyph00094"/>
|
||||||
|
<GlyphID id="95" name="glyph00095"/>
|
||||||
|
<GlyphID id="96" name="glyph00096"/>
|
||||||
|
<GlyphID id="97" name="glyph00097"/>
|
||||||
|
<GlyphID id="98" name="glyph00098"/>
|
||||||
|
<GlyphID id="99" name="glyph00099"/>
|
||||||
|
<GlyphID id="100" name="glyph00100"/>
|
||||||
|
<GlyphID id="101" name="glyph00101"/>
|
||||||
|
<GlyphID id="102" name="glyph00102"/>
|
||||||
|
<GlyphID id="103" name="glyph00103"/>
|
||||||
|
<GlyphID id="104" name="glyph00104"/>
|
||||||
|
<GlyphID id="105" name="glyph00105"/>
|
||||||
|
<GlyphID id="106" name="glyph00106"/>
|
||||||
|
<GlyphID id="107" name="glyph00107"/>
|
||||||
|
<GlyphID id="108" name="glyph00108"/>
|
||||||
|
<GlyphID id="109" name="glyph00109"/>
|
||||||
|
<GlyphID id="110" name="glyph00110"/>
|
||||||
|
<GlyphID id="111" name="glyph00111"/>
|
||||||
|
<GlyphID id="112" name="glyph00112"/>
|
||||||
|
<GlyphID id="113" name="glyph00113"/>
|
||||||
|
<GlyphID id="114" name="glyph00114"/>
|
||||||
|
<GlyphID id="115" name="glyph00115"/>
|
||||||
|
<GlyphID id="116" name="glyph00116"/>
|
||||||
|
</GlyphOrder>
|
||||||
|
|
||||||
|
<head>
|
||||||
|
<!-- Most of this table will be recalculated by the compiler -->
|
||||||
|
<tableVersion value="1.0"/>
|
||||||
|
<fontRevision value="2.31999206543"/>
|
||||||
|
<checkSumAdjustment value="0xd4fdc458"/>
|
||||||
|
<magicNumber value="0x5f0f3cf5"/>
|
||||||
|
<flags value="00000000 00011111"/>
|
||||||
|
<unitsPerEm value="2048"/>
|
||||||
|
<created value="Fri Sep 10 06:45:17 2010"/>
|
||||||
|
<modified value="Fri Sep 10 06:45:17 2010"/>
|
||||||
|
<xMin value="0"/>
|
||||||
|
<yMin value="0"/>
|
||||||
|
<xMax value="1000"/>
|
||||||
|
<yMax value="1000"/>
|
||||||
|
<macStyle value="00000000 00000000"/>
|
||||||
|
<lowestRecPPEM value="8"/>
|
||||||
|
<fontDirectionHint value="0"/>
|
||||||
|
<indexToLocFormat value="1"/>
|
||||||
|
<glyphDataFormat value="0"/>
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<hhea>
|
||||||
|
<tableVersion value="1.0"/>
|
||||||
|
<ascent value="1000"/>
|
||||||
|
<descent value="0"/>
|
||||||
|
<lineGap value="0"/>
|
||||||
|
<advanceWidthMax value="3554"/>
|
||||||
|
<minLeftSideBearing value="-2090"/>
|
||||||
|
<minRightSideBearing value="-1455"/>
|
||||||
|
<xMaxExtent value="3442"/>
|
||||||
|
<caretSlopeRise value="1"/>
|
||||||
|
<caretSlopeRun value="0"/>
|
||||||
|
<caretOffset value="0"/>
|
||||||
|
<reserved0 value="0"/>
|
||||||
|
<reserved1 value="0"/>
|
||||||
|
<reserved2 value="0"/>
|
||||||
|
<reserved3 value="0"/>
|
||||||
|
<metricDataFormat value="0"/>
|
||||||
|
<numberOfHMetrics value="117"/>
|
||||||
|
</hhea>
|
||||||
|
|
||||||
|
<maxp>
|
||||||
|
<!-- Most of this table will be recalculated by the compiler -->
|
||||||
|
<tableVersion value="0x10000"/>
|
||||||
|
<numGlyphs value="117"/>
|
||||||
|
<maxPoints value="852"/>
|
||||||
|
<maxContours value="43"/>
|
||||||
|
<maxCompositePoints value="104"/>
|
||||||
|
<maxCompositeContours value="12"/>
|
||||||
|
<maxZones value="2"/>
|
||||||
|
<maxTwilightPoints value="16"/>
|
||||||
|
<maxStorage value="153"/>
|
||||||
|
<maxFunctionDefs value="8"/>
|
||||||
|
<maxInstructionDefs value="0"/>
|
||||||
|
<maxStackElements value="1045"/>
|
||||||
|
<maxSizeOfInstructions value="534"/>
|
||||||
|
<maxComponentElements value="8"/>
|
||||||
|
<maxComponentDepth value="4"/>
|
||||||
|
</maxp>
|
||||||
|
|
||||||
|
<hmtx>
|
||||||
|
<mtx name=".notdef" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00001" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00002" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00003" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00004" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00005" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00006" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00007" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00008" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00009" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00010" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00011" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00012" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00013" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00014" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00015" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00016" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00017" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00018" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00019" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00020" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00021" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00022" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00023" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00024" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00025" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00026" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00027" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00028" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00029" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00030" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00031" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00032" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00033" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00034" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00035" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00036" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00037" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00038" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00039" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00040" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00041" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00042" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00043" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00044" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00045" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00046" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00047" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00048" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00049" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00050" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00051" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00052" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00053" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00054" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00055" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00056" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00057" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00058" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00059" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00060" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00061" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00062" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00063" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00064" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00065" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00066" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00067" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00068" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00069" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00070" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00071" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00072" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00073" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00074" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00075" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00076" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00077" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00078" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00079" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00080" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00081" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00082" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00083" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00084" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00085" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00086" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00087" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00088" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00089" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00090" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00091" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00092" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00093" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00094" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00095" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00096" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00097" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00098" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00099" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00100" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00101" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00102" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00103" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00104" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00105" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00106" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00107" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00108" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00109" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00110" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00111" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00112" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00113" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00114" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00115" width="1000" lsb="0"/>
|
||||||
|
<mtx name="glyph00116" width="1000" lsb="0"/>
|
||||||
|
</hmtx>
|
||||||
|
|
||||||
|
<cmap>
|
||||||
|
<tableVersion version="0"/>
|
||||||
|
<cmap_format_4 platformID="3" platEncID="0" language="0">
|
||||||
|
<map code="0xf000" name=".notdef"/>
|
||||||
|
<map code="0xf001" name="glyph00001"/>
|
||||||
|
<map code="0xf002" name="glyph00002"/>
|
||||||
|
<map code="0xf003" name="glyph00003"/>
|
||||||
|
<map code="0xf004" name="glyph00004"/>
|
||||||
|
<map code="0xf005" name="glyph00005"/>
|
||||||
|
<map code="0xf006" name="glyph00006"/>
|
||||||
|
<map code="0xf007" name="glyph00007"/>
|
||||||
|
<map code="0xf008" name="glyph00008"/>
|
||||||
|
<map code="0xf009" name="glyph00009"/>
|
||||||
|
<map code="0xf00a" name="glyph00010"/>
|
||||||
|
<map code="0xf00b" name="glyph00011"/>
|
||||||
|
<map code="0xf00c" name="glyph00012"/>
|
||||||
|
<map code="0xf00d" name="glyph00013"/>
|
||||||
|
<map code="0xf00e" name="glyph00014"/>
|
||||||
|
<map code="0xf00f" name="glyph00015"/>
|
||||||
|
<map code="0xf010" name="glyph00016"/>
|
||||||
|
<map code="0xf011" name="glyph00017"/>
|
||||||
|
<map code="0xf012" name="glyph00018"/>
|
||||||
|
<map code="0xf013" name="glyph00019"/>
|
||||||
|
<map code="0xf014" name="glyph00020"/>
|
||||||
|
<map code="0xf015" name="glyph00021"/>
|
||||||
|
<map code="0xf016" name="glyph00022"/>
|
||||||
|
<map code="0xf017" name="glyph00023"/>
|
||||||
|
<map code="0xf018" name="glyph00024"/>
|
||||||
|
<map code="0xf019" name="glyph00025"/>
|
||||||
|
<map code="0xf01a" name="glyph00026"/>
|
||||||
|
<map code="0xf01b" name="glyph00027"/>
|
||||||
|
<map code="0xf01c" name="glyph00028"/>
|
||||||
|
<map code="0xf01d" name="glyph00029"/>
|
||||||
|
<map code="0xf01e" name="glyph00030"/>
|
||||||
|
<map code="0xf01f" name="glyph00031"/>
|
||||||
|
<map code="0xf020" name="glyph00032"/>
|
||||||
|
<map code="0xf021" name="glyph00033"/>
|
||||||
|
<map code="0xf022" name="glyph00034"/>
|
||||||
|
<map code="0xf023" name="glyph00035"/>
|
||||||
|
<map code="0xf024" name="glyph00036"/>
|
||||||
|
<map code="0xf025" name="glyph00037"/>
|
||||||
|
<map code="0xf026" name="glyph00038"/>
|
||||||
|
<map code="0xf027" name="glyph00039"/>
|
||||||
|
<map code="0xf028" name="glyph00040"/>
|
||||||
|
<map code="0xf029" name="glyph00041"/>
|
||||||
|
<map code="0xf02a" name="glyph00042"/>
|
||||||
|
<map code="0xf02b" name="glyph00043"/>
|
||||||
|
<map code="0xf02c" name="glyph00044"/>
|
||||||
|
<map code="0xf02d" name="glyph00045"/>
|
||||||
|
<map code="0xf02e" name="glyph00046"/>
|
||||||
|
<map code="0xf02f" name="glyph00047"/>
|
||||||
|
<map code="0xf030" name="glyph00048"/>
|
||||||
|
<map code="0xf031" name="glyph00049"/>
|
||||||
|
<map code="0xf032" name="glyph00050"/>
|
||||||
|
<map code="0xf033" name="glyph00051"/>
|
||||||
|
<map code="0xf034" name="glyph00052"/>
|
||||||
|
<map code="0xf035" name="glyph00053"/>
|
||||||
|
<map code="0xf036" name="glyph00054"/>
|
||||||
|
<map code="0xf037" name="glyph00055"/>
|
||||||
|
<map code="0xf038" name="glyph00056"/>
|
||||||
|
<map code="0xf039" name="glyph00057"/>
|
||||||
|
<map code="0xf03a" name="glyph00058"/>
|
||||||
|
<map code="0xf03b" name="glyph00059"/>
|
||||||
|
<map code="0xf03c" name="glyph00060"/>
|
||||||
|
<map code="0xf03d" name="glyph00061"/>
|
||||||
|
<map code="0xf03e" name="glyph00062"/>
|
||||||
|
<map code="0xf03f" name="glyph00063"/>
|
||||||
|
<map code="0xf040" name="glyph00064"/>
|
||||||
|
<map code="0xf041" name="glyph00065"/>
|
||||||
|
<map code="0xf042" name="glyph00066"/>
|
||||||
|
<map code="0xf043" name="glyph00067"/>
|
||||||
|
<map code="0xf044" name="glyph00068"/>
|
||||||
|
<map code="0xf045" name="glyph00069"/>
|
||||||
|
<map code="0xf046" name="glyph00070"/>
|
||||||
|
<map code="0xf047" name="glyph00071"/>
|
||||||
|
<map code="0xf048" name="glyph00072"/>
|
||||||
|
<map code="0xf049" name="glyph00073"/>
|
||||||
|
<map code="0xf04a" name="glyph00074"/>
|
||||||
|
<map code="0xf04b" name="glyph00075"/>
|
||||||
|
<map code="0xf04c" name="glyph00076"/>
|
||||||
|
<map code="0xf04d" name="glyph00077"/>
|
||||||
|
<map code="0xf04e" name="glyph00078"/>
|
||||||
|
<map code="0xf04f" name="glyph00079"/>
|
||||||
|
<map code="0xf050" name="glyph00080"/>
|
||||||
|
<map code="0xf051" name="glyph00081"/>
|
||||||
|
<map code="0xf052" name="glyph00082"/>
|
||||||
|
<map code="0xf053" name="glyph00083"/>
|
||||||
|
<map code="0xf054" name="glyph00084"/>
|
||||||
|
<map code="0xf055" name="glyph00085"/>
|
||||||
|
<map code="0xf056" name="glyph00086"/>
|
||||||
|
<map code="0xf057" name="glyph00087"/>
|
||||||
|
<map code="0xf058" name="glyph00088"/>
|
||||||
|
<map code="0xf059" name="glyph00089"/>
|
||||||
|
<map code="0xf05a" name="glyph00090"/>
|
||||||
|
<map code="0xf05b" name="glyph00091"/>
|
||||||
|
<map code="0xf05c" name="glyph00092"/>
|
||||||
|
<map code="0xf05d" name="glyph00093"/>
|
||||||
|
<map code="0xf05e" name="glyph00094"/>
|
||||||
|
<map code="0xf05f" name="glyph00095"/>
|
||||||
|
<map code="0xf060" name="glyph00096"/>
|
||||||
|
<map code="0xf061" name="glyph00097"/>
|
||||||
|
<map code="0xf062" name="glyph00098"/>
|
||||||
|
<map code="0xf063" name="glyph00099"/>
|
||||||
|
<map code="0xf064" name="glyph00100"/>
|
||||||
|
<map code="0xf065" name="glyph00101"/>
|
||||||
|
<map code="0xf066" name="glyph00102"/>
|
||||||
|
<map code="0xf067" name="glyph00103"/>
|
||||||
|
<map code="0xf068" name="glyph00104"/>
|
||||||
|
<map code="0xf069" name="glyph00105"/>
|
||||||
|
<map code="0xf06a" name="glyph00106"/>
|
||||||
|
<map code="0xf06b" name="glyph00107"/>
|
||||||
|
<map code="0xf06c" name="glyph00108"/>
|
||||||
|
<map code="0xf06d" name="glyph00109"/>
|
||||||
|
<map code="0xf06e" name="glyph00110"/>
|
||||||
|
<map code="0xf06f" name="glyph00111"/>
|
||||||
|
<map code="0xf070" name="glyph00112"/>
|
||||||
|
<map code="0xf071" name="glyph00113"/>
|
||||||
|
<map code="0xf072" name="glyph00114"/>
|
||||||
|
<map code="0xf073" name="glyph00115"/>
|
||||||
|
<map code="0xf074" name="glyph00116"/>
|
||||||
|
</cmap_format_4>
|
||||||
|
<cmap_format_6 platformID="1" platEncID="0" language="0">
|
||||||
|
<map code="0x0" name=".notdef"/>
|
||||||
|
<map code="0x1" name="glyph00001"/>
|
||||||
|
<map code="0x2" name="glyph00002"/>
|
||||||
|
<map code="0x3" name="glyph00003"/>
|
||||||
|
<map code="0x4" name="glyph00004"/>
|
||||||
|
<map code="0x5" name="glyph00005"/>
|
||||||
|
<map code="0x6" name="glyph00006"/>
|
||||||
|
<map code="0x7" name="glyph00007"/>
|
||||||
|
<map code="0x8" name="glyph00008"/>
|
||||||
|
<map code="0x9" name="glyph00009"/>
|
||||||
|
<map code="0xa" name="glyph00010"/>
|
||||||
|
<map code="0xb" name="glyph00011"/>
|
||||||
|
<map code="0xc" name="glyph00012"/>
|
||||||
|
<map code="0xd" name="glyph00013"/>
|
||||||
|
<map code="0xe" name="glyph00014"/>
|
||||||
|
<map code="0xf" name="glyph00015"/>
|
||||||
|
<map code="0x10" name="glyph00016"/>
|
||||||
|
<map code="0x11" name="glyph00017"/>
|
||||||
|
<map code="0x12" name="glyph00018"/>
|
||||||
|
<map code="0x13" name="glyph00019"/>
|
||||||
|
<map code="0x14" name="glyph00020"/>
|
||||||
|
<map code="0x15" name="glyph00021"/>
|
||||||
|
<map code="0x16" name="glyph00022"/>
|
||||||
|
<map code="0x17" name="glyph00023"/>
|
||||||
|
<map code="0x18" name="glyph00024"/>
|
||||||
|
<map code="0x19" name="glyph00025"/>
|
||||||
|
<map code="0x1a" name="glyph00026"/>
|
||||||
|
<map code="0x1b" name="glyph00027"/>
|
||||||
|
<map code="0x1c" name="glyph00028"/>
|
||||||
|
<map code="0x1d" name="glyph00029"/>
|
||||||
|
<map code="0x1e" name="glyph00030"/>
|
||||||
|
<map code="0x1f" name="glyph00031"/>
|
||||||
|
<map code="0x20" name="glyph00032"/>
|
||||||
|
<map code="0x21" name="glyph00033"/>
|
||||||
|
<map code="0x22" name="glyph00034"/>
|
||||||
|
<map code="0x23" name="glyph00035"/>
|
||||||
|
<map code="0x24" name="glyph00036"/>
|
||||||
|
<map code="0x25" name="glyph00037"/>
|
||||||
|
<map code="0x26" name="glyph00038"/>
|
||||||
|
<map code="0x27" name="glyph00039"/>
|
||||||
|
<map code="0x28" name="glyph00040"/>
|
||||||
|
<map code="0x29" name="glyph00041"/>
|
||||||
|
<map code="0x2a" name="glyph00042"/>
|
||||||
|
<map code="0x2b" name="glyph00043"/>
|
||||||
|
<map code="0x2c" name="glyph00044"/>
|
||||||
|
<map code="0x2d" name="glyph00045"/>
|
||||||
|
<map code="0x2e" name="glyph00046"/>
|
||||||
|
<map code="0x2f" name="glyph00047"/>
|
||||||
|
<map code="0x30" name="glyph00048"/>
|
||||||
|
<map code="0x31" name="glyph00049"/>
|
||||||
|
<map code="0x32" name="glyph00050"/>
|
||||||
|
<map code="0x33" name="glyph00051"/>
|
||||||
|
<map code="0x34" name="glyph00052"/>
|
||||||
|
<map code="0x35" name="glyph00053"/>
|
||||||
|
<map code="0x36" name="glyph00054"/>
|
||||||
|
<map code="0x37" name="glyph00055"/>
|
||||||
|
<map code="0x38" name="glyph00056"/>
|
||||||
|
<map code="0x39" name="glyph00057"/>
|
||||||
|
<map code="0x3a" name="glyph00058"/>
|
||||||
|
<map code="0x3b" name="glyph00059"/>
|
||||||
|
<map code="0x3c" name="glyph00060"/>
|
||||||
|
<map code="0x3d" name="glyph00061"/>
|
||||||
|
<map code="0x3e" name="glyph00062"/>
|
||||||
|
<map code="0x3f" name="glyph00063"/>
|
||||||
|
<map code="0x40" name="glyph00064"/>
|
||||||
|
<map code="0x41" name="glyph00065"/>
|
||||||
|
<map code="0x42" name="glyph00066"/>
|
||||||
|
<map code="0x43" name="glyph00067"/>
|
||||||
|
<map code="0x44" name="glyph00068"/>
|
||||||
|
<map code="0x45" name="glyph00069"/>
|
||||||
|
<map code="0x46" name="glyph00070"/>
|
||||||
|
<map code="0x47" name="glyph00071"/>
|
||||||
|
<map code="0x48" name="glyph00072"/>
|
||||||
|
<map code="0x49" name="glyph00073"/>
|
||||||
|
<map code="0x4a" name="glyph00074"/>
|
||||||
|
<map code="0x4b" name="glyph00075"/>
|
||||||
|
<map code="0x4c" name="glyph00076"/>
|
||||||
|
<map code="0x4d" name="glyph00077"/>
|
||||||
|
<map code="0x4e" name="glyph00078"/>
|
||||||
|
<map code="0x4f" name="glyph00079"/>
|
||||||
|
<map code="0x50" name="glyph00080"/>
|
||||||
|
<map code="0x51" name="glyph00081"/>
|
||||||
|
<map code="0x52" name="glyph00082"/>
|
||||||
|
<map code="0x53" name="glyph00083"/>
|
||||||
|
<map code="0x54" name="glyph00084"/>
|
||||||
|
<map code="0x55" name="glyph00085"/>
|
||||||
|
<map code="0x56" name="glyph00086"/>
|
||||||
|
<map code="0x57" name="glyph00087"/>
|
||||||
|
<map code="0x58" name="glyph00088"/>
|
||||||
|
<map code="0x59" name="glyph00089"/>
|
||||||
|
<map code="0x5a" name="glyph00090"/>
|
||||||
|
<map code="0x5b" name="glyph00091"/>
|
||||||
|
<map code="0x5c" name="glyph00092"/>
|
||||||
|
<map code="0x5d" name="glyph00093"/>
|
||||||
|
<map code="0x5e" name="glyph00094"/>
|
||||||
|
<map code="0x5f" name="glyph00095"/>
|
||||||
|
<map code="0x60" name="glyph00096"/>
|
||||||
|
<map code="0x61" name="glyph00097"/>
|
||||||
|
<map code="0x62" name="glyph00098"/>
|
||||||
|
<map code="0x63" name="glyph00099"/>
|
||||||
|
<map code="0x64" name="glyph00100"/>
|
||||||
|
<map code="0x65" name="glyph00101"/>
|
||||||
|
<map code="0x66" name="glyph00102"/>
|
||||||
|
<map code="0x67" name="glyph00103"/>
|
||||||
|
<map code="0x68" name="glyph00104"/>
|
||||||
|
<map code="0x69" name="glyph00105"/>
|
||||||
|
<map code="0x6a" name="glyph00106"/>
|
||||||
|
<map code="0x6b" name="glyph00107"/>
|
||||||
|
<map code="0x6c" name="glyph00108"/>
|
||||||
|
<map code="0x6d" name="glyph00109"/>
|
||||||
|
<map code="0x6e" name="glyph00110"/>
|
||||||
|
<map code="0x6f" name="glyph00111"/>
|
||||||
|
<map code="0x70" name="glyph00112"/>
|
||||||
|
<map code="0x71" name="glyph00113"/>
|
||||||
|
<map code="0x72" name="glyph00114"/>
|
||||||
|
<map code="0x73" name="glyph00115"/>
|
||||||
|
<map code="0x74" name="glyph00116"/>
|
||||||
|
</cmap_format_6>
|
||||||
|
</cmap>
|
||||||
|
|
||||||
|
<loca>
|
||||||
|
<!-- The 'loca' table will be calculated by the compiler -->
|
||||||
|
</loca>
|
||||||
|
|
||||||
|
<glyf>
|
||||||
|
<!-- The xMin, yMin, xMax and yMax values
|
||||||
|
will be recalculated by the compiler. -->
|
||||||
|
<TTGlyph name=".notdef" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
<contour>
|
||||||
|
</contour>
|
||||||
|
<instructions><assembly>
|
||||||
|
</assembly></instructions>
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00001" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00002" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00003" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00004" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00005" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00006" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00007" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00008" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00009" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00010" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00011" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00012" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00013" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00014" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00015" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00016" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00017" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00018" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00019" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00020" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00021" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00022" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00023" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00024" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00025" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00026" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00027" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00028" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00029" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00030" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00031" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00032" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00033" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00034" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00035" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00036" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00037" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00038" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00039" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00040" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00041" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00042" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00043" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00044" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00045" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00046" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00047" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00048" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00049" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00050" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00051" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00052" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00053" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00054" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00055" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00056" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00057" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00058" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00059" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00060" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00061" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00062" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00063" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00064" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00065" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00066" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00067" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00068" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00069" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00070" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00071" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00072" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00073" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00074" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00075" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00076" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00077" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00078" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00079" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00080" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00081" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00082" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00083" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00084" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00085" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00086" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00087" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00088" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00089" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00090" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00091" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00092" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00093" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00094" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00095" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00096" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00097" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00098" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00099" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00100" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00101" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00102" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00103" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00104" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00105" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00106" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00107" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00108" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00109" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00110" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00111" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00112" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00113" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00114" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00115" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
<TTGlyph name="glyph00116" xMin="0" yMin="0" xMax="1000" yMax="1000">
|
||||||
|
</TTGlyph>
|
||||||
|
</glyf>
|
||||||
|
</ttFont>
|
9
NGCC/Tess4J/test/log4j.properties
Normal file
9
NGCC/Tess4J/test/log4j.properties
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
# Set root logger level to DEBUG and its only appender to A1.
|
||||||
|
log4j.rootLogger=DEBUG, A1
|
||||||
|
|
||||||
|
# A1 is set to be a ConsoleAppender.
|
||||||
|
log4j.appender.A1=org.apache.log4j.ConsoleAppender
|
||||||
|
|
||||||
|
# A1 uses PatternLayout.
|
||||||
|
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
|
||||||
|
log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
|
75
NGCC/Tess4J/test/net/sourceforge/tess4j/ProgressMonitor.java
Normal file
75
NGCC/Tess4J/test/net/sourceforge/tess4j/ProgressMonitor.java
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
/**
|
||||||
|
* Copyright @ 2014 Quan Nguyen
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||||
|
* use this file except in compliance with the License. You may obtain a copy of
|
||||||
|
* the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package net.sourceforge.tess4j;
|
||||||
|
|
||||||
|
import com.sun.jna.Pointer;
|
||||||
|
import net.sourceforge.tess4j.util.LoggHelper;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import static net.sourceforge.tess4j.ITessAPI.TRUE;
|
||||||
|
|
||||||
|
class ProgressMonitor extends Thread {
|
||||||
|
|
||||||
|
ITessAPI.ETEXT_DESC monitor;
|
||||||
|
StringBuilder outputMessage = new StringBuilder();
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
|
||||||
|
|
||||||
|
public ProgressMonitor(ITessAPI.ETEXT_DESC monitor) {
|
||||||
|
this.monitor = monitor;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getMessage() {
|
||||||
|
return outputMessage.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
try {
|
||||||
|
while (true) {
|
||||||
|
logger.info("ocr alive: " + (monitor.ocr_alive == TRUE));
|
||||||
|
logger.info("progress: " + monitor.progress);
|
||||||
|
outputMessage.append(monitor.more_to_come);
|
||||||
|
if (monitor.progress >= 100) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Thread.sleep(100);
|
||||||
|
}
|
||||||
|
} catch (Exception ioe) {
|
||||||
|
ioe.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cancels OCR operation.
|
||||||
|
*/
|
||||||
|
public void cancel() {
|
||||||
|
monitor.cancel = new ITessAPI.CANCEL_FUNC() {
|
||||||
|
@Override
|
||||||
|
public boolean invoke(Pointer cancel_this, int words) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resets cancel flag.
|
||||||
|
*/
|
||||||
|
public void reset() {
|
||||||
|
monitor.cancel = null;
|
||||||
|
}
|
||||||
|
}
|
645
NGCC/Tess4J/test/net/sourceforge/tess4j/TessAPI1Test.java
Normal file
645
NGCC/Tess4J/test/net/sourceforge/tess4j/TessAPI1Test.java
Normal file
@ -0,0 +1,645 @@
|
|||||||
|
/**
|
||||||
|
* Copyright @ 2012 Quan Nguyen
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||||
|
* use this file except in compliance with the License. You may obtain a copy of
|
||||||
|
* the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package net.sourceforge.tess4j;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertArrayEquals;
|
||||||
|
|
||||||
|
import java.awt.image.BufferedImage;
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.FileReader;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.FloatBuffer;
|
||||||
|
import java.nio.IntBuffer;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import javax.imageio.ImageIO;
|
||||||
|
|
||||||
|
import net.sourceforge.tess4j.util.LoggHelper;
|
||||||
|
import net.sourceforge.tess4j.util.Utils;
|
||||||
|
import net.sourceforge.tess4j.util.ImageIOHelper;
|
||||||
|
|
||||||
|
import com.ochafik.lang.jnaerator.runtime.NativeSize;
|
||||||
|
import com.sun.jna.NativeLong;
|
||||||
|
|
||||||
|
import org.junit.After;
|
||||||
|
import org.junit.AfterClass;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import com.sun.jna.Pointer;
|
||||||
|
import com.sun.jna.StringArray;
|
||||||
|
import com.sun.jna.ptr.PointerByReference;
|
||||||
|
import net.sourceforge.lept4j.Box;
|
||||||
|
import net.sourceforge.lept4j.Boxa;
|
||||||
|
import static net.sourceforge.lept4j.ILeptonica.L_CLONE;
|
||||||
|
import net.sourceforge.lept4j.Leptonica;
|
||||||
|
import net.sourceforge.lept4j.Leptonica1;
|
||||||
|
import net.sourceforge.lept4j.Pix;
|
||||||
|
import net.sourceforge.lept4j.util.LeptUtils;
|
||||||
|
|
||||||
|
import net.sourceforge.tess4j.ITessAPI.*;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import static net.sourceforge.tess4j.ITessAPI.FALSE;
|
||||||
|
import static net.sourceforge.tess4j.ITessAPI.TRUE;
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
|
public class TessAPI1Test {
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
|
||||||
|
private final String datapath = ".";
|
||||||
|
private final String testResourcesDataPath = "test/resources/test-data";
|
||||||
|
String language = "eng";
|
||||||
|
String expOCRResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 <lazy> #90 dog";
|
||||||
|
|
||||||
|
TessBaseAPI handle;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void setUpClass() throws Exception {
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterClass
|
||||||
|
public static void tearDownClass() throws Exception {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() {
|
||||||
|
handle = TessAPI1.TessBaseAPICreate();
|
||||||
|
}
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void tearDown() {
|
||||||
|
TessAPI1.TessBaseAPIDelete(handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of TessBaseAPIRect method, of class TessAPI1.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testTessBaseAPIRect() throws Exception {
|
||||||
|
logger.info("TessBaseAPIRect");
|
||||||
|
String expResult = expOCRResult;
|
||||||
|
File tiff = new File(testResourcesDataPath, "eurotext.tif");
|
||||||
|
BufferedImage image = ImageIO.read(tiff); // require jai-imageio lib to read TIFF
|
||||||
|
ByteBuffer buf = ImageIOHelper.convertImageData(image);
|
||||||
|
int bpp = image.getColorModel().getPixelSize();
|
||||||
|
int bytespp = bpp / 8;
|
||||||
|
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
|
||||||
|
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
|
||||||
|
TessAPI1.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO);
|
||||||
|
Pointer utf8Text = TessAPI1.TessBaseAPIRect(handle, buf, bytespp, bytespl, 0, 0, image.getWidth(), image.getHeight());
|
||||||
|
String result = utf8Text.getString(0);
|
||||||
|
TessAPI1.TessDeleteText(utf8Text);
|
||||||
|
logger.info(result);
|
||||||
|
assertEquals(expResult, result.substring(0, expResult.length()));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of TessBaseAPIGetUTF8Text method, of class TessAPI1.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testTessBaseAPIGetUTF8Text() throws Exception {
|
||||||
|
logger.info("TessBaseAPIGetUTF8Text");
|
||||||
|
String expResult = expOCRResult;
|
||||||
|
File tiff = new File(testResourcesDataPath, "eurotext.tif");
|
||||||
|
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
|
||||||
|
ByteBuffer buf = ImageIOHelper.convertImageData(image);
|
||||||
|
int bpp = image.getColorModel().getPixelSize();
|
||||||
|
int bytespp = bpp / 8;
|
||||||
|
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
|
||||||
|
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
|
||||||
|
TessAPI1.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO);
|
||||||
|
TessAPI1.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
|
||||||
|
TessAPI1.TessBaseAPISetRectangle(handle, 0, 0, 1024, 800);
|
||||||
|
Pointer utf8Text = TessAPI1.TessBaseAPIGetUTF8Text(handle);
|
||||||
|
String result = utf8Text.getString(0);
|
||||||
|
TessAPI1.TessDeleteText(utf8Text);
|
||||||
|
logger.info(result);
|
||||||
|
assertEquals(expResult, result.substring(0, expResult.length()));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of TessBaseAPIGetUTF8Text method, of class TessAPI1.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testTessBaseAPIGetUTF8Text_Pix() throws Exception {
|
||||||
|
logger.info("TessBaseAPIGetUTF8Text_Pix");
|
||||||
|
String expResult = expOCRResult;
|
||||||
|
File tiff = new File(testResourcesDataPath, "eurotext.tif");
|
||||||
|
Leptonica leptInstance = Leptonica.INSTANCE;
|
||||||
|
Pix pix = leptInstance.pixRead(tiff.getPath());
|
||||||
|
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
|
||||||
|
TessAPI1.TessBaseAPISetImage2(handle, pix);
|
||||||
|
Pointer utf8Text = TessAPI1.TessBaseAPIGetUTF8Text(handle);
|
||||||
|
String result = utf8Text.getString(0);
|
||||||
|
TessAPI1.TessDeleteText(utf8Text);
|
||||||
|
logger.info(result);
|
||||||
|
|
||||||
|
//release Pix resource
|
||||||
|
PointerByReference pRef = new PointerByReference();
|
||||||
|
pRef.setValue(pix.getPointer());
|
||||||
|
leptInstance.pixDestroy(pRef);
|
||||||
|
|
||||||
|
assertEquals(expResult, result.substring(0, expResult.length()));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of TessBaseAPIGetComponentImages method, of class TessAPI1.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testTessBaseAPIGetComponentImages() throws Exception {
|
||||||
|
logger.info("TessBaseAPIGetComponentImages");
|
||||||
|
File image = new File(testResourcesDataPath, "eurotext.png");
|
||||||
|
int expResult = 12; // number of lines in the test image
|
||||||
|
Pix pix = Leptonica1.pixRead(image.getPath());
|
||||||
|
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
|
||||||
|
TessAPI1.TessBaseAPISetImage2(handle, pix);
|
||||||
|
PointerByReference pixa = null;
|
||||||
|
PointerByReference blockids = null;
|
||||||
|
Boxa boxes = TessAPI1.TessBaseAPIGetComponentImages(handle, TessPageIteratorLevel.RIL_TEXTLINE, TRUE, pixa, blockids);
|
||||||
|
// boxes = TessAPI1.TessBaseAPIGetRegions(handle, pixa); // equivalent to TessPageIteratorLevel.RIL_BLOCK
|
||||||
|
int boxCount = Leptonica1.boxaGetCount(boxes);
|
||||||
|
for (int i = 0; i < boxCount; i++) {
|
||||||
|
Box box = Leptonica1.boxaGetBox(boxes, i, L_CLONE);
|
||||||
|
if (box == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
TessAPI1.TessBaseAPISetRectangle(handle, box.x, box.y, box.w, box.h);
|
||||||
|
Pointer utf8Text = TessAPI1.TessBaseAPIGetUTF8Text(handle);
|
||||||
|
String ocrResult = utf8Text.getString(0);
|
||||||
|
TessAPI1.TessDeleteText(utf8Text);
|
||||||
|
int conf = TessAPI1.TessBaseAPIMeanTextConf(handle);
|
||||||
|
System.out.print(String.format("Box[%d]: x=%d, y=%d, w=%d, h=%d, confidence: %d, text: %s", i, box.x, box.y, box.w, box.h, conf, ocrResult));
|
||||||
|
LeptUtils.dispose(box);
|
||||||
|
}
|
||||||
|
|
||||||
|
// release Pix and Boxa resources
|
||||||
|
LeptUtils.dispose(pix);
|
||||||
|
LeptUtils.dispose(boxes);
|
||||||
|
|
||||||
|
assertEquals(expResult, boxCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of TessVersion method, of class TessAPI1.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testTessVersion() {
|
||||||
|
logger.info("TessVersion");
|
||||||
|
String expResult = "3.05.01";
|
||||||
|
String result = TessAPI1.TessVersion();
|
||||||
|
logger.info(result);
|
||||||
|
assertTrue(result.startsWith(expResult));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of TessBaseAPIGetBoolVariable method, of class TessAPI1.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testTessBaseAPIGetBoolVariable() {
|
||||||
|
logger.info("TessBaseAPIGetBoolVariable");
|
||||||
|
String name = "tessedit_create_hocr";
|
||||||
|
TessAPI1.TessBaseAPISetVariable(handle, name, "1");
|
||||||
|
IntBuffer value = IntBuffer.allocate(1);
|
||||||
|
int result = -1;
|
||||||
|
if (TessAPI1.TessBaseAPIGetBoolVariable(handle, "tessedit_create_hocr", value) == TRUE) {
|
||||||
|
result = value.get(0);
|
||||||
|
}
|
||||||
|
int expResult = 1;
|
||||||
|
assertEquals(expResult, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of TessBaseAPIPrintVariables method, of class TessAPI1.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testTessBaseAPIPrintVariablesToFile() throws Exception {
|
||||||
|
logger.info("TessBaseAPIPrintVariablesToFile");
|
||||||
|
String var = "tessedit_char_whitelist";
|
||||||
|
String value = "0123456789";
|
||||||
|
TessAPI1.TessBaseAPISetVariable(handle, var, value);
|
||||||
|
String filename = "printvar.txt";
|
||||||
|
TessAPI1.TessBaseAPIPrintVariablesToFile(handle, filename); // will crash if not invoked after some method
|
||||||
|
File file = new File(filename);
|
||||||
|
BufferedReader input = new BufferedReader(new FileReader(file));
|
||||||
|
StringBuilder strB = new StringBuilder();
|
||||||
|
String line;
|
||||||
|
String EOL = System.getProperty("line.separator");
|
||||||
|
while ((line = input.readLine()) != null) {
|
||||||
|
strB.append(line).append(EOL);
|
||||||
|
}
|
||||||
|
input.close();
|
||||||
|
file.delete();
|
||||||
|
assertTrue(strB.toString().contains(var + "\t" + value));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of TessBaseAPIInit4 method, of class TessAPI1.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testTessBaseAPIInit4() {
|
||||||
|
logger.info("TessBaseAPIInit4");
|
||||||
|
int oem = TessOcrEngineMode.OEM_DEFAULT;
|
||||||
|
PointerByReference configs = null;
|
||||||
|
int configs_size = 0;
|
||||||
|
|
||||||
|
// disable loading dictionaries
|
||||||
|
String[] args = new String[]{"load_system_dawg", "load_freq_dawg"};
|
||||||
|
StringArray sarray = new StringArray(args);
|
||||||
|
PointerByReference vars_vec = new PointerByReference();
|
||||||
|
vars_vec.setPointer(sarray);
|
||||||
|
|
||||||
|
args = new String[]{"F", "F"};
|
||||||
|
sarray = new StringArray(args);
|
||||||
|
PointerByReference vars_values = new PointerByReference();
|
||||||
|
vars_values.setPointer(sarray);
|
||||||
|
|
||||||
|
NativeSize vars_vec_size = new NativeSize(args.length);
|
||||||
|
|
||||||
|
int expResult = 0;
|
||||||
|
int result = TessAPI1.TessBaseAPIInit4(handle, datapath, language, oem, configs, configs_size, vars_vec, vars_values, vars_vec_size, FALSE);
|
||||||
|
assertEquals(expResult, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of TessBaseAPIGetInitLanguagesAsString method, of class TessAPI1.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testTessBaseAPIGetInitLanguagesAsString() {
|
||||||
|
logger.info("TessBaseAPIGetInitLanguagesAsString");
|
||||||
|
String expResult = "";
|
||||||
|
String result = TessAPI1.TessBaseAPIGetInitLanguagesAsString(handle);
|
||||||
|
assertEquals(expResult, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of TessBaseAPIGetLoadedLanguagesAsVector method, of class TessAPI1.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testTessBaseAPIGetLoadedLanguagesAsVector() {
|
||||||
|
logger.info("TessBaseAPIGetLoadedLanguagesAsVector");
|
||||||
|
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
|
||||||
|
String[] expResult = {"eng"};
|
||||||
|
String[] result = TessAPI1.TessBaseAPIGetLoadedLanguagesAsVector(handle).getPointer().getStringArray(0);
|
||||||
|
assertArrayEquals(expResult, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of TessBaseAPIGetAvailableLanguagesAsVector method, of class
|
||||||
|
* TessAPI1.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testTessBaseAPIGetAvailableLanguagesAsVector() {
|
||||||
|
logger.info("TessBaseAPIGetAvailableLanguagesAsVector");
|
||||||
|
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
|
||||||
|
String[] expResult = {"eng"};
|
||||||
|
String[] result = TessAPI1.TessBaseAPIGetAvailableLanguagesAsVector(handle).getPointer().getStringArray(0);
|
||||||
|
assertTrue(Arrays.asList(result).containsAll(Arrays.asList(expResult)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of TessBaseAPIGetHOCRText method, of class TessAPI1.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testTessBaseAPIGetHOCRText() throws Exception {
|
||||||
|
logger.info("TessBaseAPIGetHOCRText");
|
||||||
|
File tiff = new File(testResourcesDataPath, "eurotext.tif");
|
||||||
|
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
|
||||||
|
ByteBuffer buf = ImageIOHelper.convertImageData(image);
|
||||||
|
int bpp = image.getColorModel().getPixelSize();
|
||||||
|
int bytespp = bpp / 8;
|
||||||
|
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
|
||||||
|
TessAPI1.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO);
|
||||||
|
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
|
||||||
|
TessAPI1.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
|
||||||
|
int page_number = 0;
|
||||||
|
Pointer utf8Text = TessAPI1.TessBaseAPIGetHOCRText(handle, page_number);
|
||||||
|
String result = utf8Text.getString(0);
|
||||||
|
TessAPI1.TessDeleteText(utf8Text);
|
||||||
|
assertTrue(result.contains("<div class='ocr_page'"));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of TessBaseAPIAnalyseLayout method, of class TessAPI1.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testTessBaseAPIAnalyseLayout() throws Exception {
|
||||||
|
logger.info("TessBaseAPIAnalyseLayout");
|
||||||
|
File image = new File(testResourcesDataPath, "eurotext.png");
|
||||||
|
int expResult = 12; // number of lines in the test image
|
||||||
|
Leptonica leptInstance = Leptonica.INSTANCE;
|
||||||
|
Pix pix = leptInstance.pixRead(image.getPath());
|
||||||
|
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
|
||||||
|
TessAPI1.TessBaseAPISetImage2(handle, pix);
|
||||||
|
int pageIteratorLevel = TessPageIteratorLevel.RIL_TEXTLINE;
|
||||||
|
logger.info("PageIteratorLevel: " + Utils.getConstantName(pageIteratorLevel, TessPageIteratorLevel.class));
|
||||||
|
int i = 0;
|
||||||
|
TessPageIterator pi = TessAPI1.TessBaseAPIAnalyseLayout(handle);
|
||||||
|
|
||||||
|
do {
|
||||||
|
IntBuffer leftB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer topB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer rightB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer bottomB = IntBuffer.allocate(1);
|
||||||
|
TessAPI1.TessPageIteratorBoundingBox(pi, pageIteratorLevel, leftB, topB, rightB, bottomB);
|
||||||
|
int left = leftB.get();
|
||||||
|
int top = topB.get();
|
||||||
|
int right = rightB.get();
|
||||||
|
int bottom = bottomB.get();
|
||||||
|
logger.info(String.format("Box[%d]: x=%d, y=%d, w=%d, h=%d", i++, left, top, right - left, bottom - top));
|
||||||
|
} while (TessAPI1.TessPageIteratorNext(pi, pageIteratorLevel) == TRUE);
|
||||||
|
TessAPI1.TessPageIteratorDelete(pi);
|
||||||
|
assertEquals(expResult, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of TessBaseAPIDetectOrientationScript method, of class TessAPI1.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testTessBaseAPIDetectOrientationScript() throws Exception {
|
||||||
|
logger.info("TessBaseAPIDetectOrientationScript");
|
||||||
|
File image = new File(testResourcesDataPath, "eurotext.png");
|
||||||
|
int expResult = TRUE;
|
||||||
|
Pix pix = Leptonica1.pixRead(image.getPath());
|
||||||
|
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
|
||||||
|
TessAPI1.TessBaseAPISetImage2(handle, pix);
|
||||||
|
|
||||||
|
IntBuffer orient_degB = IntBuffer.allocate(1);
|
||||||
|
FloatBuffer orient_confB = FloatBuffer.allocate(1);
|
||||||
|
PointerByReference script_nameB = new PointerByReference();
|
||||||
|
FloatBuffer script_confB = FloatBuffer.allocate(1);
|
||||||
|
|
||||||
|
int result = TessAPI1.TessBaseAPIDetectOrientationScript(handle, orient_degB, orient_confB, script_nameB, script_confB);
|
||||||
|
if (result == TRUE) {
|
||||||
|
int orient_deg = orient_degB.get();
|
||||||
|
float orient_conf = orient_confB.get();
|
||||||
|
String script_name = script_nameB.getValue().getString(0);
|
||||||
|
float script_conf = script_confB.get();
|
||||||
|
logger.info(String.format("OrientationScript: orient_deg=%d, orient_conf=%f, script_name=%s, script_conf=%f", orient_deg, orient_conf, script_name, script_conf));
|
||||||
|
}
|
||||||
|
|
||||||
|
PointerByReference pRef = new PointerByReference();
|
||||||
|
pRef.setValue(pix.getPointer());
|
||||||
|
Leptonica1.pixDestroy(pRef);
|
||||||
|
assertEquals(expResult, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of Orientation and script detection (OSD).
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testOSD() throws Exception {
|
||||||
|
logger.info("OSD");
|
||||||
|
int expResult = TessPageSegMode.PSM_AUTO_OSD;
|
||||||
|
IntBuffer orientation = IntBuffer.allocate(1);
|
||||||
|
IntBuffer direction = IntBuffer.allocate(1);
|
||||||
|
IntBuffer order = IntBuffer.allocate(1);
|
||||||
|
FloatBuffer deskew_angle = FloatBuffer.allocate(1);
|
||||||
|
File tiff = new File(testResourcesDataPath, "eurotext.tif");
|
||||||
|
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
|
||||||
|
ByteBuffer buf = ImageIOHelper.convertImageData(image);
|
||||||
|
int bpp = image.getColorModel().getPixelSize();
|
||||||
|
int bytespp = bpp / 8;
|
||||||
|
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
|
||||||
|
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
|
||||||
|
TessAPI1.TessBaseAPISetPageSegMode(handle, expResult);
|
||||||
|
int actualResult = TessAPI1.TessBaseAPIGetPageSegMode(handle);
|
||||||
|
logger.info("PSM: " + Utils.getConstantName(actualResult, TessPageSegMode.class));
|
||||||
|
TessAPI1.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
|
||||||
|
int success = TessAPI1.TessBaseAPIRecognize(handle, null);
|
||||||
|
if (success == 0) {
|
||||||
|
TessAPI1.TessPageIterator pi = TessAPI1.TessBaseAPIAnalyseLayout(handle);
|
||||||
|
TessAPI1.TessPageIteratorOrientation(pi, orientation, direction, order, deskew_angle);
|
||||||
|
logger.info(String.format(
|
||||||
|
"Orientation: %s\nWritingDirection: %s\nTextlineOrder: %s\nDeskew angle: %.4f\n",
|
||||||
|
Utils.getConstantName(orientation.get(), TessOrientation.class),
|
||||||
|
Utils.getConstantName(direction.get(), TessWritingDirection.class),
|
||||||
|
Utils.getConstantName(order.get(), TessTextlineOrder.class),
|
||||||
|
deskew_angle.get()));
|
||||||
|
}
|
||||||
|
assertEquals(expResult, actualResult);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of ResultIterator and PageIterator.
|
||||||
|
*
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testResultIterator() throws Exception {
|
||||||
|
logger.info("TessBaseAPIGetIterator");
|
||||||
|
File tiff = new File(testResourcesDataPath, "eurotext.tif");
|
||||||
|
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
|
||||||
|
ByteBuffer buf = ImageIOHelper.convertImageData(image);
|
||||||
|
int bpp = image.getColorModel().getPixelSize();
|
||||||
|
int bytespp = bpp / 8;
|
||||||
|
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
|
||||||
|
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
|
||||||
|
TessAPI1.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO);
|
||||||
|
TessAPI1.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
|
||||||
|
ETEXT_DESC monitor = new ETEXT_DESC();
|
||||||
|
ITessAPI.TimeVal timeout = new ITessAPI.TimeVal();
|
||||||
|
timeout.tv_sec = new NativeLong(0L); // time > 0 causes blank ouput
|
||||||
|
monitor.end_time = timeout;
|
||||||
|
ProgressMonitor pmo = new ProgressMonitor(monitor);
|
||||||
|
pmo.start();
|
||||||
|
TessAPI1.TessBaseAPIRecognize(handle, monitor);
|
||||||
|
logger.info("Message: " + pmo.getMessage());
|
||||||
|
TessResultIterator ri = TessAPI1.TessBaseAPIGetIterator(handle);
|
||||||
|
TessPageIterator pi = TessAPI1.TessResultIteratorGetPageIterator(ri);
|
||||||
|
TessAPI1.TessPageIteratorBegin(pi);
|
||||||
|
logger.info("Bounding boxes:\nchar(s) left top right bottom confidence font-attributes");
|
||||||
|
int level = TessPageIteratorLevel.RIL_WORD;
|
||||||
|
|
||||||
|
// int height = image.getHeight();
|
||||||
|
do {
|
||||||
|
Pointer ptr = TessAPI1.TessResultIteratorGetUTF8Text(ri, level);
|
||||||
|
String word = ptr.getString(0);
|
||||||
|
TessAPI1.TessDeleteText(ptr);
|
||||||
|
float confidence = TessAPI1.TessResultIteratorConfidence(ri, level);
|
||||||
|
IntBuffer leftB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer topB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer rightB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer bottomB = IntBuffer.allocate(1);
|
||||||
|
TessAPI1.TessPageIteratorBoundingBox(pi, level, leftB, topB, rightB, bottomB);
|
||||||
|
int left = leftB.get();
|
||||||
|
int top = topB.get();
|
||||||
|
int right = rightB.get();
|
||||||
|
int bottom = bottomB.get();
|
||||||
|
System.out.print(String.format("%s %d %d %d %d %f", word, left, top, right, bottom, confidence));
|
||||||
|
// logger.info(String.format("%s %d %d %d %d", str, left, height - bottom, right, height - top)); //
|
||||||
|
// training box coordinates
|
||||||
|
|
||||||
|
IntBuffer boldB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer italicB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer underlinedB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer monospaceB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer serifB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer smallcapsB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer pointSizeB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer fontIdB = IntBuffer.allocate(1);
|
||||||
|
String fontName = TessAPI1.TessResultIteratorWordFontAttributes(ri, boldB, italicB, underlinedB,
|
||||||
|
monospaceB, serifB, smallcapsB, pointSizeB, fontIdB);
|
||||||
|
boolean bold = boldB.get() == TRUE;
|
||||||
|
boolean italic = italicB.get() == TRUE;
|
||||||
|
boolean underlined = underlinedB.get() == TRUE;
|
||||||
|
boolean monospace = monospaceB.get() == TRUE;
|
||||||
|
boolean serif = serifB.get() == TRUE;
|
||||||
|
boolean smallcaps = smallcapsB.get() == TRUE;
|
||||||
|
int pointSize = pointSizeB.get();
|
||||||
|
int fontId = fontIdB.get();
|
||||||
|
logger.info(String.format(" font: %s, size: %d, font id: %d, bold: %b,"
|
||||||
|
+ " italic: %b, underlined: %b, monospace: %b, serif: %b, smallcap: %b", fontName, pointSize,
|
||||||
|
fontId, bold, italic, underlined, monospace, serif, smallcaps));
|
||||||
|
} while (TessAPI1.TessPageIteratorNext(pi, level) == TRUE);
|
||||||
|
|
||||||
|
assertTrue(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of ChoiceIterator.
|
||||||
|
*
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testChoiceIterator() throws Exception {
|
||||||
|
logger.info("TessResultIteratorGetChoiceIterator");
|
||||||
|
File tiff = new File(testResourcesDataPath, "eurotext.tif");
|
||||||
|
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
|
||||||
|
ByteBuffer buf = ImageIOHelper.convertImageData(image);
|
||||||
|
int bpp = image.getColorModel().getPixelSize();
|
||||||
|
int bytespp = bpp / 8;
|
||||||
|
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
|
||||||
|
TessAPI1.TessBaseAPIInit3(handle, datapath, language);
|
||||||
|
TessAPI1.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
|
||||||
|
TessAPI1.TessBaseAPISetVariable(handle, "save_blob_choices", "T");
|
||||||
|
TessAPI1.TessBaseAPISetRectangle(handle, 37, 228, 548, 31);
|
||||||
|
ETEXT_DESC monitor = new ETEXT_DESC();
|
||||||
|
ProgressMonitor pmo = new ProgressMonitor(monitor);
|
||||||
|
pmo.start();
|
||||||
|
TessAPI1.TessBaseAPIRecognize(handle, monitor);
|
||||||
|
logger.info("Message: " + pmo.getMessage());
|
||||||
|
TessResultIterator ri = TessAPI1.TessBaseAPIGetIterator(handle);
|
||||||
|
int level = TessPageIteratorLevel.RIL_SYMBOL;
|
||||||
|
|
||||||
|
if (ri != null) {
|
||||||
|
do {
|
||||||
|
Pointer symbol = TessAPI1.TessResultIteratorGetUTF8Text(ri, level);
|
||||||
|
float conf = TessAPI1.TessResultIteratorConfidence(ri, level);
|
||||||
|
if (symbol != null) {
|
||||||
|
logger.info(String.format("symbol %s, conf: %f", symbol.getString(0), conf));
|
||||||
|
boolean indent = false;
|
||||||
|
TessChoiceIterator ci = TessAPI1.TessResultIteratorGetChoiceIterator(ri);
|
||||||
|
do {
|
||||||
|
if (indent) {
|
||||||
|
System.out.print("\t");
|
||||||
|
}
|
||||||
|
System.out.print("\t- ");
|
||||||
|
String choice = TessAPI1.TessChoiceIteratorGetUTF8Text(ci);
|
||||||
|
logger.info(String.format("%s conf: %f", choice, TessAPI1.TessChoiceIteratorConfidence(ci)));
|
||||||
|
indent = true;
|
||||||
|
} while (TessAPI1.TessChoiceIteratorNext(ci) == ITessAPI.TRUE);
|
||||||
|
TessAPI1.TessChoiceIteratorDelete(ci);
|
||||||
|
}
|
||||||
|
logger.info("---------------------------------------------");
|
||||||
|
TessAPI1.TessDeleteText(symbol);
|
||||||
|
} while (TessAPI1.TessResultIteratorNext(ri, level) == ITessAPI.TRUE);
|
||||||
|
}
|
||||||
|
|
||||||
|
assertTrue(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of ResultRenderer method, of class TessAPI1.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testResultRenderer() throws Exception {
|
||||||
|
logger.info("TessResultRenderer");
|
||||||
|
String image = String.format("%s/%s", testResourcesDataPath, "eurotext.tif");
|
||||||
|
String output = "capi-test.txt";
|
||||||
|
int set_only_init_params = ITessAPI.FALSE;
|
||||||
|
int oem = TessOcrEngineMode.OEM_DEFAULT;
|
||||||
|
PointerByReference configs = null;
|
||||||
|
int configs_size = 0;
|
||||||
|
|
||||||
|
String[] params = {"load_system_dawg", "tessedit_char_whitelist"};
|
||||||
|
String vals[] = {"F", ""}; //0123456789-.IThisalotfpnex
|
||||||
|
PointerByReference vars_vec = new PointerByReference();
|
||||||
|
vars_vec.setPointer(new StringArray(params));
|
||||||
|
PointerByReference vars_values = new PointerByReference();
|
||||||
|
vars_values.setPointer(new StringArray(vals));
|
||||||
|
NativeSize vars_vec_size = new NativeSize(params.length);
|
||||||
|
|
||||||
|
TessAPI1.TessBaseAPISetOutputName(handle, output);
|
||||||
|
|
||||||
|
int rc = TessAPI1.TessBaseAPIInit4(handle, datapath, language,
|
||||||
|
oem, configs, configs_size, vars_vec, vars_values, vars_vec_size, set_only_init_params);
|
||||||
|
|
||||||
|
if (rc != 0) {
|
||||||
|
TessAPI1.TessBaseAPIDelete(handle);
|
||||||
|
logger.error("Could not initialize tesseract.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
String outputbase = "test/test-results/outputbase1";
|
||||||
|
TessResultRenderer renderer = TessAPI1.TessHOcrRendererCreate(outputbase);
|
||||||
|
TessAPI1.TessResultRendererInsert(renderer, TessAPI1.TessBoxTextRendererCreate(outputbase));
|
||||||
|
TessAPI1.TessResultRendererInsert(renderer, TessAPI1.TessTextRendererCreate(outputbase));
|
||||||
|
String dataPath = TessAPI1.TessBaseAPIGetDatapath(handle);
|
||||||
|
TessAPI1.TessResultRendererInsert(renderer, TessAPI1.TessPDFRendererCreate(outputbase, dataPath));
|
||||||
|
int result = TessAPI1.TessBaseAPIProcessPages(handle, image, null, 0, renderer);
|
||||||
|
|
||||||
|
// if (result == FALSE) {
|
||||||
|
// logger.error("Error during processing.");
|
||||||
|
// return;
|
||||||
|
// }
|
||||||
|
for (; renderer != null; renderer = TessAPI1.TessResultRendererNext(renderer)) {
|
||||||
|
String ext = TessAPI1.TessResultRendererExtention(renderer).getString(0);
|
||||||
|
logger.info(String.format("TessResultRendererExtention: %s\nTessResultRendererTitle: %s\nTessResultRendererImageNum: %d",
|
||||||
|
ext,
|
||||||
|
TessAPI1.TessResultRendererTitle(renderer).getString(0),
|
||||||
|
TessAPI1.TessResultRendererImageNum(renderer)));
|
||||||
|
}
|
||||||
|
|
||||||
|
TessAPI1.TessDeleteResultRenderer(renderer);
|
||||||
|
assertTrue(new File(outputbase + ".pdf").exists());
|
||||||
|
}
|
||||||
|
}
|
625
NGCC/Tess4J/test/net/sourceforge/tess4j/TessAPIImpl.java
Normal file
625
NGCC/Tess4J/test/net/sourceforge/tess4j/TessAPIImpl.java
Normal file
@ -0,0 +1,625 @@
|
|||||||
|
/*
|
||||||
|
* Copyright @ 2017 Quan Nguyen
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package net.sourceforge.tess4j;
|
||||||
|
|
||||||
|
import com.ochafik.lang.jnaerator.runtime.NativeSize;
|
||||||
|
import com.sun.jna.Pointer;
|
||||||
|
import com.sun.jna.ptr.IntByReference;
|
||||||
|
import com.sun.jna.ptr.PointerByReference;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.DoubleBuffer;
|
||||||
|
import java.nio.FloatBuffer;
|
||||||
|
import java.nio.IntBuffer;
|
||||||
|
import net.sourceforge.lept4j.Boxa;
|
||||||
|
import net.sourceforge.lept4j.Pix;
|
||||||
|
|
||||||
|
public class TessAPIImpl implements TessAPI {
|
||||||
|
|
||||||
|
public TessAPI getInstance() {
|
||||||
|
return TessAPI.INSTANCE;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void TessAPIEndPage() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public void TessAPIRelease() {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String TessVersion() {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void TessDeleteText(Pointer text) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void TessDeleteTextArray(PointerByReference arr) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void TessDeleteIntArray(IntBuffer arr) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ITessAPI.TessResultRenderer TessTextRendererCreate(String outputbase) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ITessAPI.TessResultRenderer TessHOcrRendererCreate(String outputbase) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TessResultRenderer TessHOcrRendererCreate2(String outputbase, int font_info) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ITessAPI.TessResultRenderer TessPDFRendererCreate(String outputbase, String datadir) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TessResultRenderer TessPDFRendererCreateTextonly(String outputbase, String datadir, int textonly) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ITessAPI.TessResultRenderer TessUnlvRendererCreate(String outputbase) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ITessAPI.TessResultRenderer TessBoxTextRendererCreate(String outputbase) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void TessDeleteResultRenderer(ITessAPI.TessResultRenderer renderer) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void TessResultRendererInsert(ITessAPI.TessResultRenderer renderer, ITessAPI.TessResultRenderer next) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ITessAPI.TessResultRenderer TessResultRendererNext(ITessAPI.TessResultRenderer renderer) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessResultRendererBeginDocument(ITessAPI.TessResultRenderer renderer, String title) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessResultRendererAddImage(ITessAPI.TessResultRenderer renderer, PointerByReference api) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessResultRendererEndDocument(ITessAPI.TessResultRenderer renderer) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Pointer TessResultRendererExtention(ITessAPI.TessResultRenderer renderer) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Pointer TessResultRendererTitle(ITessAPI.TessResultRenderer renderer) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessResultRendererImageNum(ITessAPI.TessResultRenderer renderer) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ITessAPI.TessBaseAPI TessBaseAPICreate() {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void TessBaseAPIDelete(ITessAPI.TessBaseAPI handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void TessBaseAPISetInputName(ITessAPI.TessBaseAPI handle, String name) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String TessBaseAPIGetInputName(ITessAPI.TessBaseAPI handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void TessBaseAPISetInputImage(ITessAPI.TessBaseAPI handle, Pix pix) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Pix TessBaseAPIGetInputImage(ITessAPI.TessBaseAPI handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessBaseAPIGetSourceYResolution(ITessAPI.TessBaseAPI handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String TessBaseAPIGetDatapath(ITessAPI.TessBaseAPI handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void TessBaseAPISetOutputName(ITessAPI.TessBaseAPI handle, String name) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessBaseAPISetVariable(ITessAPI.TessBaseAPI handle, String name, String value) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessBaseAPIGetIntVariable(ITessAPI.TessBaseAPI handle, String name, IntBuffer value) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessBaseAPIGetBoolVariable(ITessAPI.TessBaseAPI handle, String name, IntBuffer value) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessBaseAPIGetDoubleVariable(ITessAPI.TessBaseAPI handle, String name, DoubleBuffer value) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String TessBaseAPIGetStringVariable(ITessAPI.TessBaseAPI handle, String name) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void TessBaseAPIPrintVariablesToFile(ITessAPI.TessBaseAPI handle, String filename) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessBaseAPIInit1(ITessAPI.TessBaseAPI handle, String datapath, String language, int oem, PointerByReference configs, int configs_size) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessBaseAPIInit2(ITessAPI.TessBaseAPI handle, String datapath, String language, int oem) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessBaseAPIInit3(ITessAPI.TessBaseAPI handle, String datapath, String language) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessBaseAPIInit4(ITessAPI.TessBaseAPI handle, String datapath, String language, int oem, PointerByReference configs, int configs_size, PointerByReference vars_vec, PointerByReference vars_values, NativeSize vars_vec_size, int set_only_non_debug_params) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String TessBaseAPIGetInitLanguagesAsString(ITessAPI.TessBaseAPI handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PointerByReference TessBaseAPIGetLoadedLanguagesAsVector(ITessAPI.TessBaseAPI handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PointerByReference TessBaseAPIGetAvailableLanguagesAsVector(ITessAPI.TessBaseAPI handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessBaseAPIInitLangMod(ITessAPI.TessBaseAPI handle, String datapath, String language) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void TessBaseAPIInitForAnalysePage(ITessAPI.TessBaseAPI handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void TessBaseAPIReadConfigFile(ITessAPI.TessBaseAPI handle, String filename, int init_only) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void TessBaseAPISetPageSegMode(ITessAPI.TessBaseAPI handle, int mode) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessBaseAPIGetPageSegMode(ITessAPI.TessBaseAPI handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Pointer TessBaseAPIRect(ITessAPI.TessBaseAPI handle, ByteBuffer imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void TessBaseAPIClearAdaptiveClassifier(ITessAPI.TessBaseAPI handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void TessBaseAPISetImage(ITessAPI.TessBaseAPI handle, ByteBuffer imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void TessBaseAPISetImage2(ITessAPI.TessBaseAPI handle, Pix pix) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void TessBaseAPISetSourceResolution(ITessAPI.TessBaseAPI handle, int ppi) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void TessBaseAPISetRectangle(ITessAPI.TessBaseAPI handle, int left, int top, int width, int height) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Pix TessBaseAPIGetThresholdedImage(ITessAPI.TessBaseAPI handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Boxa TessBaseAPIGetRegions(ITessAPI.TessBaseAPI handle, PointerByReference pixa) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Boxa TessBaseAPIGetTextlines(ITessAPI.TessBaseAPI handle, PointerByReference pixa, PointerByReference blockids) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Boxa TessBaseAPIGetTextlines1(ITessAPI.TessBaseAPI handle, int raw_image, int raw_padding, PointerByReference pixa, PointerByReference blockids, PointerByReference paraids) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Boxa TessBaseAPIGetStrips(ITessAPI.TessBaseAPI handle, PointerByReference pixa, PointerByReference blockids) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Boxa TessBaseAPIGetWords(ITessAPI.TessBaseAPI handle, PointerByReference pixa) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Boxa TessBaseAPIGetConnectedComponents(ITessAPI.TessBaseAPI handle, PointerByReference cc) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Boxa TessBaseAPIGetComponentImages(ITessAPI.TessBaseAPI handle, int level, int text_only, PointerByReference pixa, PointerByReference blockids) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Boxa TessBaseAPIGetComponentImages1(ITessAPI.TessBaseAPI handle, int level, int text_only, int raw_image, int raw_padding, PointerByReference pixa, PointerByReference blockids, PointerByReference paraids) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessBaseAPIGetThresholdedImageScaleFactor(ITessAPI.TessBaseAPI handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void TessBaseAPIDumpPGM(ITessAPI.TessBaseAPI handle, String filename) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ITessAPI.TessPageIterator TessBaseAPIAnalyseLayout(ITessAPI.TessBaseAPI handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessBaseAPIRecognize(ITessAPI.TessBaseAPI handle, ITessAPI.ETEXT_DESC monitor) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessBaseAPIRecognizeForChopTest(ITessAPI.TessBaseAPI handle, ITessAPI.ETEXT_DESC monitor) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ITessAPI.TessResultIterator TessBaseAPIGetIterator(ITessAPI.TessBaseAPI handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ITessAPI.TessMutableIterator TessBaseAPIGetMutableIterator(ITessAPI.TessBaseAPI handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessBaseAPIProcessPages(ITessAPI.TessBaseAPI handle, String filename, String retry_config, int timeout_millisec, ITessAPI.TessResultRenderer renderer) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessBaseAPIProcessPage(ITessAPI.TessBaseAPI handle, Pix pix, int page_index, String filename, String retry_config, int timeout_millisec, ITessAPI.TessResultRenderer renderer) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Pointer TessBaseAPIGetUTF8Text(ITessAPI.TessBaseAPI handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Pointer TessBaseAPIGetHOCRText(ITessAPI.TessBaseAPI handle, int page_number) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Pointer TessBaseAPIGetBoxText(ITessAPI.TessBaseAPI handle, int page_number) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Pointer TessBaseAPIGetUNLVText(ITessAPI.TessBaseAPI handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessBaseAPIMeanTextConf(ITessAPI.TessBaseAPI handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IntByReference TessBaseAPIAllWordConfidences(ITessAPI.TessBaseAPI handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessBaseAPIAdaptToWordStr(ITessAPI.TessBaseAPI handle, int mode, String wordstr) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void TessBaseAPIClear(ITessAPI.TessBaseAPI handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void TessBaseAPIEnd(ITessAPI.TessBaseAPI handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessBaseAPIIsValidWord(ITessAPI.TessBaseAPI handle, String word) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessBaseAPIGetTextDirection(ITessAPI.TessBaseAPI handle, IntBuffer out_offset, FloatBuffer out_slope) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void TessBaseAPIClearPersistentCache(ITessAPI.TessBaseAPI handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessBaseAPIDetectOrientationScript(TessBaseAPI handle, IntBuffer orient_deg, FloatBuffer orient_conf, PointerByReference script_name, FloatBuffer script_conf) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String TessBaseAPIGetUnichar(ITessAPI.TessBaseAPI handle, int unichar_id) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void TessPageIteratorDelete(ITessAPI.TessPageIterator handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ITessAPI.TessPageIterator TessPageIteratorCopy(ITessAPI.TessPageIterator handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void TessPageIteratorBegin(ITessAPI.TessPageIterator handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessPageIteratorNext(ITessAPI.TessPageIterator handle, int level) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessPageIteratorIsAtBeginningOf(ITessAPI.TessPageIterator handle, int level) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessPageIteratorIsAtFinalElement(ITessAPI.TessPageIterator handle, int level, int element) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessPageIteratorBoundingBox(ITessAPI.TessPageIterator handle, int level, IntBuffer left, IntBuffer top, IntBuffer right, IntBuffer bottom) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessPageIteratorBlockType(ITessAPI.TessPageIterator handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Pix TessPageIteratorGetBinaryImage(ITessAPI.TessPageIterator handle, int level) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Pix TessPageIteratorGetImage(ITessAPI.TessPageIterator handle, int level, int padding, Pix original_image, IntBuffer left, IntBuffer top) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessPageIteratorBaseline(ITessAPI.TessPageIterator handle, int level, IntBuffer x1, IntBuffer y1, IntBuffer x2, IntBuffer y2) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void TessPageIteratorOrientation(ITessAPI.TessPageIterator handle, IntBuffer orientation, IntBuffer writing_direction, IntBuffer textline_order, FloatBuffer deskew_angle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void TessPageIteratorParagraphInfo(ITessAPI.TessPageIterator handle, IntBuffer justification, IntBuffer is_list_item, IntBuffer is_crown, IntBuffer first_line_indent) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void TessResultIteratorDelete(ITessAPI.TessResultIterator handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ITessAPI.TessResultIterator TessResultIteratorCopy(ITessAPI.TessResultIterator handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ITessAPI.TessPageIterator TessResultIteratorGetPageIterator(ITessAPI.TessResultIterator handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ITessAPI.TessPageIterator TessResultIteratorGetPageIteratorConst(ITessAPI.TessResultIterator handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessResultIteratorNext(ITessAPI.TessResultIterator handle, int level) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Pointer TessResultIteratorGetUTF8Text(ITessAPI.TessResultIterator handle, int level) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float TessResultIteratorConfidence(ITessAPI.TessResultIterator handle, int level) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String TessResultIteratorWordRecognitionLanguage(ITessAPI.TessResultIterator handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String TessResultIteratorWordFontAttributes(ITessAPI.TessResultIterator handle, IntBuffer is_bold, IntBuffer is_italic, IntBuffer is_underlined, IntBuffer is_monospace, IntBuffer is_serif, IntBuffer is_smallcaps, IntBuffer pointsize, IntBuffer font_id) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessResultIteratorWordIsFromDictionary(ITessAPI.TessResultIterator handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessResultIteratorWordIsNumeric(ITessAPI.TessResultIterator handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessResultIteratorSymbolIsSuperscript(ITessAPI.TessResultIterator handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessResultIteratorSymbolIsSubscript(ITessAPI.TessResultIterator handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessResultIteratorSymbolIsDropcap(ITessAPI.TessResultIterator handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ITessAPI.TessChoiceIterator TessResultIteratorGetChoiceIterator(ITessAPI.TessResultIterator handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void TessChoiceIteratorDelete(ITessAPI.TessChoiceIterator handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int TessChoiceIteratorNext(ITessAPI.TessChoiceIterator handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String TessChoiceIteratorGetUTF8Text(ITessAPI.TessChoiceIterator handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float TessChoiceIteratorConfidence(ITessAPI.TessChoiceIterator handle) {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
}
|
648
NGCC/Tess4J/test/net/sourceforge/tess4j/TessAPITest.java
Normal file
648
NGCC/Tess4J/test/net/sourceforge/tess4j/TessAPITest.java
Normal file
@ -0,0 +1,648 @@
|
|||||||
|
/**
|
||||||
|
* Copyright @ 2012 Quan Nguyen
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||||
|
* use this file except in compliance with the License. You may obtain a copy of
|
||||||
|
* the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package net.sourceforge.tess4j;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertArrayEquals;
|
||||||
|
|
||||||
|
import java.awt.image.BufferedImage;
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.FileReader;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.FloatBuffer;
|
||||||
|
import java.nio.IntBuffer;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import javax.imageio.ImageIO;
|
||||||
|
|
||||||
|
import net.sourceforge.tess4j.util.ImageIOHelper;
|
||||||
|
import net.sourceforge.tess4j.util.LoggHelper;
|
||||||
|
import net.sourceforge.tess4j.util.Utils;
|
||||||
|
|
||||||
|
import org.junit.After;
|
||||||
|
import org.junit.AfterClass;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import com.ochafik.lang.jnaerator.runtime.NativeSize;
|
||||||
|
import com.sun.jna.NativeLong;
|
||||||
|
import com.sun.jna.Pointer;
|
||||||
|
import com.sun.jna.StringArray;
|
||||||
|
import com.sun.jna.ptr.PointerByReference;
|
||||||
|
import net.sourceforge.lept4j.Box;
|
||||||
|
import net.sourceforge.lept4j.Boxa;
|
||||||
|
import static net.sourceforge.lept4j.ILeptonica.L_CLONE;
|
||||||
|
import net.sourceforge.lept4j.Leptonica;
|
||||||
|
import net.sourceforge.lept4j.Pix;
|
||||||
|
import net.sourceforge.lept4j.util.LeptUtils;
|
||||||
|
|
||||||
|
import net.sourceforge.tess4j.ITessAPI.*;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import static net.sourceforge.tess4j.ITessAPI.FALSE;
|
||||||
|
import static net.sourceforge.tess4j.ITessAPI.TRUE;
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
|
public class TessAPITest {
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
|
||||||
|
private final String datapath = ".";
|
||||||
|
private final String testResourcesDataPath = "test/resources/test-data";
|
||||||
|
String language = "eng";
|
||||||
|
String expOCRResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 <lazy> #90 dog";
|
||||||
|
|
||||||
|
TessAPI api;
|
||||||
|
TessBaseAPI handle;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void setUpClass() throws Exception {
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterClass
|
||||||
|
public static void tearDownClass() throws Exception {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() {
|
||||||
|
api = new TessAPIImpl().getInstance();
|
||||||
|
handle = api.TessBaseAPICreate();
|
||||||
|
}
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void tearDown() {
|
||||||
|
api.TessBaseAPIDelete(handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of TessBaseAPIRect method, of class TessAPI.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testTessBaseAPIRect() throws Exception {
|
||||||
|
logger.info("TessBaseAPIRect");
|
||||||
|
String expResult = expOCRResult;
|
||||||
|
File tiff = new File(testResourcesDataPath, "eurotext.tif");
|
||||||
|
BufferedImage image = ImageIO.read(tiff); // require jai-imageio lib to read TIFF
|
||||||
|
ByteBuffer buf = ImageIOHelper.convertImageData(image);
|
||||||
|
int bpp = image.getColorModel().getPixelSize();
|
||||||
|
int bytespp = bpp / 8;
|
||||||
|
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
|
||||||
|
api.TessBaseAPIInit3(handle, datapath, language);
|
||||||
|
api.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO);
|
||||||
|
Pointer utf8Text = api.TessBaseAPIRect(handle, buf, bytespp, bytespl, 90, 50, 862, 614);
|
||||||
|
String result = utf8Text.getString(0);
|
||||||
|
api.TessDeleteText(utf8Text);
|
||||||
|
logger.info(result);
|
||||||
|
assertTrue(result.startsWith(expResult));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of TessBaseAPIGetUTF8Text method, of class TessAPI.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testTessBaseAPIGetUTF8Text() throws Exception {
|
||||||
|
logger.info("TessBaseAPIGetUTF8Text");
|
||||||
|
String expResult = expOCRResult;
|
||||||
|
File tiff = new File(testResourcesDataPath, "eurotext.tif");
|
||||||
|
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
|
||||||
|
ByteBuffer buf = ImageIOHelper.convertImageData(image);
|
||||||
|
int bpp = image.getColorModel().getPixelSize();
|
||||||
|
int bytespp = bpp / 8;
|
||||||
|
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
|
||||||
|
api.TessBaseAPIInit3(handle, datapath, language);
|
||||||
|
api.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO);
|
||||||
|
api.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
|
||||||
|
api.TessBaseAPISetRectangle(handle, 90, 50, 862, 614);
|
||||||
|
Pointer utf8Text = api.TessBaseAPIGetUTF8Text(handle);
|
||||||
|
String result = utf8Text.getString(0);
|
||||||
|
api.TessDeleteText(utf8Text);
|
||||||
|
logger.info(result);
|
||||||
|
assertTrue(result.startsWith(expResult));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of TessBaseAPIGetUTF8Text method, of class TessAPI.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testTessBaseAPIGetUTF8Text_Pix() throws Exception {
|
||||||
|
logger.info("TessBaseAPIGetUTF8Text_Pix");
|
||||||
|
String expResult = expOCRResult;
|
||||||
|
File tiff = new File(testResourcesDataPath, "eurotext.tif");
|
||||||
|
Leptonica leptInstance = Leptonica.INSTANCE;
|
||||||
|
Pix pix = leptInstance.pixRead(tiff.getPath());
|
||||||
|
api.TessBaseAPIInit3(handle, datapath, language);
|
||||||
|
api.TessBaseAPISetImage2(handle, pix);
|
||||||
|
Pointer utf8Text = api.TessBaseAPIGetUTF8Text(handle);
|
||||||
|
String result = utf8Text.getString(0);
|
||||||
|
api.TessDeleteText(utf8Text);
|
||||||
|
logger.info(result);
|
||||||
|
|
||||||
|
//release Pix resource
|
||||||
|
PointerByReference pRef = new PointerByReference();
|
||||||
|
pRef.setValue(pix.getPointer());
|
||||||
|
leptInstance.pixDestroy(pRef);
|
||||||
|
|
||||||
|
assertTrue(result.startsWith(expResult));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of TessBaseAPIGetComponentImages method, of class TessAPI.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testTessBaseAPIGetComponentImages() throws Exception {
|
||||||
|
logger.info("TessBaseAPIGetComponentImages");
|
||||||
|
File image = new File(testResourcesDataPath, "eurotext.png");
|
||||||
|
int expResult = 12; // number of lines in the test image
|
||||||
|
Leptonica leptInstance = Leptonica.INSTANCE;
|
||||||
|
Pix pix = leptInstance.pixRead(image.getPath());
|
||||||
|
api.TessBaseAPIInit3(handle, datapath, language);
|
||||||
|
api.TessBaseAPISetImage2(handle, pix);
|
||||||
|
PointerByReference pixa = null;
|
||||||
|
PointerByReference blockids = null;
|
||||||
|
Boxa boxes = api.TessBaseAPIGetComponentImages(handle, TessPageIteratorLevel.RIL_TEXTLINE, TRUE, pixa, blockids);
|
||||||
|
// boxes = api.TessBaseAPIGetRegions(handle, pixa); // equivalent to TessPageIteratorLevel.RIL_BLOCK
|
||||||
|
int boxCount = leptInstance.boxaGetCount(boxes);
|
||||||
|
for (int i = 0; i < boxCount; i++) {
|
||||||
|
Box box = leptInstance.boxaGetBox(boxes, i, L_CLONE);
|
||||||
|
if (box == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
api.TessBaseAPISetRectangle(handle, box.x, box.y, box.w, box.h);
|
||||||
|
Pointer utf8Text = api.TessBaseAPIGetUTF8Text(handle);
|
||||||
|
String ocrResult = utf8Text.getString(0);
|
||||||
|
api.TessDeleteText(utf8Text);
|
||||||
|
int conf = api.TessBaseAPIMeanTextConf(handle);
|
||||||
|
System.out.print(String.format("Box[%d]: x=%d, y=%d, w=%d, h=%d, confidence: %d, text: %s", i, box.x, box.y, box.w, box.h, conf, ocrResult));
|
||||||
|
LeptUtils.dispose(box);
|
||||||
|
}
|
||||||
|
|
||||||
|
// release Pix and Boxa resources
|
||||||
|
LeptUtils.dispose(pix);
|
||||||
|
LeptUtils.dispose(boxes);
|
||||||
|
|
||||||
|
assertEquals(expResult, boxCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of TessVersion method, of class TessAPI.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testTessVersion() {
|
||||||
|
logger.info("TessVersion");
|
||||||
|
String expResult = "3.05.01";
|
||||||
|
String result = api.TessVersion();
|
||||||
|
logger.info(result);
|
||||||
|
assertTrue(result.startsWith(expResult));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of TessBaseAPIGetBoolVariable method, of class TessAPI.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testTessBaseAPIGetBoolVariable() {
|
||||||
|
logger.info("TessBaseAPIGetBoolVariable");
|
||||||
|
String name = "tessedit_create_hocr";
|
||||||
|
api.TessBaseAPISetVariable(handle, name, "1");
|
||||||
|
IntBuffer value = IntBuffer.allocate(1);
|
||||||
|
int result = -1;
|
||||||
|
if (api.TessBaseAPIGetBoolVariable(handle, "tessedit_create_hocr", value) == TRUE) {
|
||||||
|
result = value.get(0);
|
||||||
|
}
|
||||||
|
int expResult = 1;
|
||||||
|
assertEquals(expResult, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of TessBaseAPIPrintVariables method, of class TessAPI.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testTessBaseAPIPrintVariablesToFile() throws Exception {
|
||||||
|
logger.info("TessBaseAPIPrintVariablesToFile");
|
||||||
|
String var = "tessedit_char_whitelist";
|
||||||
|
String value = "0123456789";
|
||||||
|
api.TessBaseAPISetVariable(handle, var, value);
|
||||||
|
String filename = "printvar.txt";
|
||||||
|
api.TessBaseAPIPrintVariablesToFile(handle, filename); // will crash if not invoked after some method
|
||||||
|
File file = new File(filename);
|
||||||
|
BufferedReader input = new BufferedReader(new FileReader(file));
|
||||||
|
StringBuilder strB = new StringBuilder();
|
||||||
|
String line;
|
||||||
|
String EOL = System.getProperty("line.separator");
|
||||||
|
while ((line = input.readLine()) != null) {
|
||||||
|
strB.append(line).append(EOL);
|
||||||
|
}
|
||||||
|
input.close();
|
||||||
|
file.delete();
|
||||||
|
assertTrue(strB.toString().contains(var + "\t" + value));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of TessBaseAPIInit4 method, of class TessAPI.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testTessBaseAPIInit4() {
|
||||||
|
logger.info("TessBaseAPIInit4");
|
||||||
|
int oem = TessOcrEngineMode.OEM_DEFAULT;
|
||||||
|
PointerByReference configs = null; //new PointerByReference();
|
||||||
|
int configs_size = 0;
|
||||||
|
|
||||||
|
// disable loading dictionaries
|
||||||
|
String[] args = new String[]{"load_system_dawg", "load_freq_dawg"};
|
||||||
|
StringArray sarray = new StringArray(args);
|
||||||
|
PointerByReference vars_vec = new PointerByReference();
|
||||||
|
vars_vec.setPointer(sarray);
|
||||||
|
|
||||||
|
args = new String[]{"F", "F"};
|
||||||
|
sarray = new StringArray(args);
|
||||||
|
PointerByReference vars_values = new PointerByReference();
|
||||||
|
vars_values.setPointer(sarray);
|
||||||
|
|
||||||
|
NativeSize vars_vec_size = new NativeSize(args.length);
|
||||||
|
|
||||||
|
int expResult = 0;
|
||||||
|
int result = api.TessBaseAPIInit4(handle, datapath, language, oem, configs, configs_size, vars_vec, vars_values, vars_vec_size, FALSE);
|
||||||
|
assertEquals(expResult, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of TessBaseAPIGetInitLanguagesAsString method, of class TessAPI.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testTessBaseAPIGetInitLanguagesAsString() {
|
||||||
|
logger.info("TessBaseAPIGetInitLanguagesAsString");
|
||||||
|
String expResult = "";
|
||||||
|
String result = api.TessBaseAPIGetInitLanguagesAsString(handle);
|
||||||
|
assertEquals(expResult, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of TessBaseAPIGetLoadedLanguagesAsVector method, of class TessAPI.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testTessBaseAPIGetLoadedLanguagesAsVector() {
|
||||||
|
logger.info("TessBaseAPIGetLoadedLanguagesAsVector");
|
||||||
|
api.TessBaseAPIInit3(handle, datapath, language);
|
||||||
|
String[] expResult = {"eng"};
|
||||||
|
String[] result = api.TessBaseAPIGetLoadedLanguagesAsVector(handle).getPointer().getStringArray(0);
|
||||||
|
assertArrayEquals(expResult, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of TessBaseAPIGetAvailableLanguagesAsVector method, of class
|
||||||
|
* TessAPI.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testTessBaseAPIGetAvailableLanguagesAsVector() {
|
||||||
|
logger.info("TessBaseAPIGetAvailableLanguagesAsVector");
|
||||||
|
api.TessBaseAPIInit3(handle, datapath, language);
|
||||||
|
String[] expResult = {"eng"};
|
||||||
|
String[] result = api.TessBaseAPIGetAvailableLanguagesAsVector(handle).getPointer().getStringArray(0);
|
||||||
|
assertTrue(Arrays.asList(result).containsAll(Arrays.asList(expResult)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of TessBaseAPIGetHOCRText method, of class TessAPI.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testTessBaseAPIGetHOCRText() throws Exception {
|
||||||
|
logger.info("TessBaseAPIGetHOCRText");
|
||||||
|
File tiff = new File(testResourcesDataPath, "eurotext.tif");
|
||||||
|
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
|
||||||
|
ByteBuffer buf = ImageIOHelper.convertImageData(image);
|
||||||
|
int bpp = image.getColorModel().getPixelSize();
|
||||||
|
int bytespp = bpp / 8;
|
||||||
|
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
|
||||||
|
api.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO);
|
||||||
|
api.TessBaseAPIInit3(handle, datapath, language);
|
||||||
|
api.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
|
||||||
|
int page_number = 0;
|
||||||
|
Pointer utf8Text = api.TessBaseAPIGetHOCRText(handle, page_number);
|
||||||
|
String result = utf8Text.getString(0);
|
||||||
|
api.TessDeleteText(utf8Text);
|
||||||
|
assertTrue(result.contains("<div class='ocr_page'"));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of TessBaseAPIAnalyseLayout method, of class TessAPI.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testTessBaseAPIAnalyseLayout() throws Exception {
|
||||||
|
logger.info("TessBaseAPIAnalyseLayout");
|
||||||
|
File image = new File(testResourcesDataPath, "eurotext.png");
|
||||||
|
int expResult = 12; // number of lines in the test image
|
||||||
|
Leptonica leptInstance = Leptonica.INSTANCE;
|
||||||
|
Pix pix = leptInstance.pixRead(image.getPath());
|
||||||
|
api.TessBaseAPIInit3(handle, datapath, language);
|
||||||
|
api.TessBaseAPISetImage2(handle, pix);
|
||||||
|
int pageIteratorLevel = TessPageIteratorLevel.RIL_TEXTLINE;
|
||||||
|
logger.info("PageIteratorLevel: " + Utils.getConstantName(pageIteratorLevel, TessPageIteratorLevel.class));
|
||||||
|
int i = 0;
|
||||||
|
TessPageIterator pi = api.TessBaseAPIAnalyseLayout(handle);
|
||||||
|
|
||||||
|
do {
|
||||||
|
IntBuffer leftB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer topB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer rightB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer bottomB = IntBuffer.allocate(1);
|
||||||
|
api.TessPageIteratorBoundingBox(pi, pageIteratorLevel, leftB, topB, rightB, bottomB);
|
||||||
|
int left = leftB.get();
|
||||||
|
int top = topB.get();
|
||||||
|
int right = rightB.get();
|
||||||
|
int bottom = bottomB.get();
|
||||||
|
logger.info(String.format("Box[%d]: x=%d, y=%d, w=%d, h=%d", i++, left, top, right - left, bottom - top));
|
||||||
|
} while (api.TessPageIteratorNext(pi, pageIteratorLevel) == TRUE);
|
||||||
|
api.TessPageIteratorDelete(pi);
|
||||||
|
assertEquals(expResult, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of TessBaseAPIDetectOrientationScript method, of class TessAPI.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testTessBaseAPIDetectOrientationScript() throws Exception {
|
||||||
|
logger.info("TessBaseAPIDetectOrientationScript");
|
||||||
|
File image = new File(testResourcesDataPath, "eurotext.png");
|
||||||
|
int expResult = TRUE;
|
||||||
|
Leptonica leptInstance = Leptonica.INSTANCE;
|
||||||
|
Pix pix = leptInstance.pixRead(image.getPath());
|
||||||
|
api.TessBaseAPIInit3(handle, datapath, language);
|
||||||
|
api.TessBaseAPISetImage2(handle, pix);
|
||||||
|
|
||||||
|
IntBuffer orient_degB = IntBuffer.allocate(1);
|
||||||
|
FloatBuffer orient_confB = FloatBuffer.allocate(1);
|
||||||
|
PointerByReference script_nameB = new PointerByReference();
|
||||||
|
FloatBuffer script_confB = FloatBuffer.allocate(1);
|
||||||
|
|
||||||
|
int result = api.TessBaseAPIDetectOrientationScript(handle, orient_degB, orient_confB, script_nameB, script_confB);
|
||||||
|
if (result == TRUE) {
|
||||||
|
int orient_deg = orient_degB.get();
|
||||||
|
float orient_conf = orient_confB.get();
|
||||||
|
String script_name = script_nameB.getValue().getString(0);
|
||||||
|
float script_conf = script_confB.get();
|
||||||
|
logger.info(String.format("OrientationScript: orient_deg=%d, orient_conf=%f, script_name=%s, script_conf=%f", orient_deg, orient_conf, script_name, script_conf));
|
||||||
|
}
|
||||||
|
|
||||||
|
PointerByReference pRef = new PointerByReference();
|
||||||
|
pRef.setValue(pix.getPointer());
|
||||||
|
leptInstance.pixDestroy(pRef);
|
||||||
|
assertEquals(expResult, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of Orientation and script detection (OSD).
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testOSD() throws Exception {
|
||||||
|
logger.info("OSD");
|
||||||
|
int expResult = TessPageSegMode.PSM_AUTO_OSD;
|
||||||
|
IntBuffer orientation = IntBuffer.allocate(1);
|
||||||
|
IntBuffer direction = IntBuffer.allocate(1);
|
||||||
|
IntBuffer order = IntBuffer.allocate(1);
|
||||||
|
FloatBuffer deskew_angle = FloatBuffer.allocate(1);
|
||||||
|
File tiff = new File(testResourcesDataPath, "eurotext.tif");
|
||||||
|
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
|
||||||
|
ByteBuffer buf = ImageIOHelper.convertImageData(image);
|
||||||
|
int bpp = image.getColorModel().getPixelSize();
|
||||||
|
int bytespp = bpp / 8;
|
||||||
|
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
|
||||||
|
api.TessBaseAPIInit3(handle, datapath, language);
|
||||||
|
api.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO_OSD);
|
||||||
|
int actualResult = api.TessBaseAPIGetPageSegMode(handle);
|
||||||
|
logger.info("PSM: " + Utils.getConstantName(actualResult, TessPageSegMode.class));
|
||||||
|
api.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
|
||||||
|
int success = api.TessBaseAPIRecognize(handle, null);
|
||||||
|
if (success == 0) {
|
||||||
|
TessPageIterator pi = api.TessBaseAPIAnalyseLayout(handle);
|
||||||
|
api.TessPageIteratorOrientation(pi, orientation, direction, order, deskew_angle);
|
||||||
|
logger.info(String.format(
|
||||||
|
"Orientation: %s\nWritingDirection: %s\nTextlineOrder: %s\nDeskew angle: %.4f\n",
|
||||||
|
Utils.getConstantName(orientation.get(), TessOrientation.class),
|
||||||
|
Utils.getConstantName(direction.get(), TessWritingDirection.class),
|
||||||
|
Utils.getConstantName(order.get(), TessTextlineOrder.class),
|
||||||
|
deskew_angle.get()));
|
||||||
|
}
|
||||||
|
assertEquals(expResult, actualResult);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of ResultIterator and PageIterator.
|
||||||
|
*
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testResultIterator() throws Exception {
|
||||||
|
logger.info("TessBaseAPIGetIterator");
|
||||||
|
File tiff = new File(testResourcesDataPath, "eurotext.tif");
|
||||||
|
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
|
||||||
|
ByteBuffer buf = ImageIOHelper.convertImageData(image);
|
||||||
|
int bpp = image.getColorModel().getPixelSize();
|
||||||
|
int bytespp = bpp / 8;
|
||||||
|
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
|
||||||
|
api.TessBaseAPIInit3(handle, datapath, language);
|
||||||
|
api.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO);
|
||||||
|
api.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
|
||||||
|
ETEXT_DESC monitor = new ETEXT_DESC();
|
||||||
|
TimeVal timeout = new TimeVal();
|
||||||
|
timeout.tv_sec = new NativeLong(0L); // time > 0 causes blank ouput
|
||||||
|
monitor.end_time = timeout;
|
||||||
|
ProgressMonitor pmo = new ProgressMonitor(monitor);
|
||||||
|
pmo.start();
|
||||||
|
api.TessBaseAPIRecognize(handle, monitor);
|
||||||
|
logger.info("Message: " + pmo.getMessage());
|
||||||
|
TessResultIterator ri = api.TessBaseAPIGetIterator(handle);
|
||||||
|
TessPageIterator pi = api.TessResultIteratorGetPageIterator(ri);
|
||||||
|
api.TessPageIteratorBegin(pi);
|
||||||
|
logger.info("Bounding boxes:\nchar(s) left top right bottom confidence font-attributes");
|
||||||
|
int level = TessPageIteratorLevel.RIL_WORD;
|
||||||
|
|
||||||
|
// int height = image.getHeight();
|
||||||
|
do {
|
||||||
|
Pointer ptr = api.TessResultIteratorGetUTF8Text(ri, level);
|
||||||
|
String word = ptr.getString(0);
|
||||||
|
api.TessDeleteText(ptr);
|
||||||
|
float confidence = api.TessResultIteratorConfidence(ri, level);
|
||||||
|
IntBuffer leftB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer topB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer rightB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer bottomB = IntBuffer.allocate(1);
|
||||||
|
api.TessPageIteratorBoundingBox(pi, level, leftB, topB, rightB, bottomB);
|
||||||
|
int left = leftB.get();
|
||||||
|
int top = topB.get();
|
||||||
|
int right = rightB.get();
|
||||||
|
int bottom = bottomB.get();
|
||||||
|
System.out.print(String.format("%s %d %d %d %d %f", word, left, top, right, bottom, confidence));
|
||||||
|
// logger.info(String.format("%s %d %d %d %d", str, left, height - bottom, right, height - top)); //
|
||||||
|
// training box coordinates
|
||||||
|
|
||||||
|
IntBuffer boldB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer italicB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer underlinedB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer monospaceB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer serifB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer smallcapsB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer pointSizeB = IntBuffer.allocate(1);
|
||||||
|
IntBuffer fontIdB = IntBuffer.allocate(1);
|
||||||
|
String fontName = api.TessResultIteratorWordFontAttributes(ri, boldB, italicB, underlinedB, monospaceB,
|
||||||
|
serifB, smallcapsB, pointSizeB, fontIdB);
|
||||||
|
boolean bold = boldB.get() == TRUE;
|
||||||
|
boolean italic = italicB.get() == TRUE;
|
||||||
|
boolean underlined = underlinedB.get() == TRUE;
|
||||||
|
boolean monospace = monospaceB.get() == TRUE;
|
||||||
|
boolean serif = serifB.get() == TRUE;
|
||||||
|
boolean smallcaps = smallcapsB.get() == TRUE;
|
||||||
|
int pointSize = pointSizeB.get();
|
||||||
|
int fontId = fontIdB.get();
|
||||||
|
logger.info(String.format(" font: %s, size: %d, font id: %d, bold: %b,"
|
||||||
|
+ " italic: %b, underlined: %b, monospace: %b, serif: %b, smallcap: %b", fontName, pointSize,
|
||||||
|
fontId, bold, italic, underlined, monospace, serif, smallcaps));
|
||||||
|
} while (api.TessPageIteratorNext(pi, level) == TRUE);
|
||||||
|
|
||||||
|
assertTrue(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of ChoiceIterator.
|
||||||
|
*
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testChoiceIterator() throws Exception {
|
||||||
|
logger.info("TessResultIteratorGetChoiceIterator");
|
||||||
|
File tiff = new File(testResourcesDataPath, "eurotext.tif");
|
||||||
|
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
|
||||||
|
ByteBuffer buf = ImageIOHelper.convertImageData(image);
|
||||||
|
int bpp = image.getColorModel().getPixelSize();
|
||||||
|
int bytespp = bpp / 8;
|
||||||
|
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
|
||||||
|
api.TessBaseAPIInit3(handle, datapath, language);
|
||||||
|
api.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
|
||||||
|
api.TessBaseAPISetVariable(handle, "save_blob_choices", "T");
|
||||||
|
api.TessBaseAPISetRectangle(handle, 37, 228, 548, 31);
|
||||||
|
ETEXT_DESC monitor = new ETEXT_DESC();
|
||||||
|
ProgressMonitor pmo = new ProgressMonitor(monitor);
|
||||||
|
pmo.start();
|
||||||
|
api.TessBaseAPIRecognize(handle, monitor);
|
||||||
|
logger.info("Message: " + pmo.getMessage());
|
||||||
|
TessResultIterator ri = api.TessBaseAPIGetIterator(handle);
|
||||||
|
int level = TessPageIteratorLevel.RIL_SYMBOL;
|
||||||
|
|
||||||
|
if (ri != null) {
|
||||||
|
do {
|
||||||
|
Pointer symbol = api.TessResultIteratorGetUTF8Text(ri, level);
|
||||||
|
float conf = api.TessResultIteratorConfidence(ri, level);
|
||||||
|
if (symbol != null) {
|
||||||
|
logger.info(String.format("symbol %s, conf: %f", symbol.getString(0), conf));
|
||||||
|
boolean indent = false;
|
||||||
|
TessChoiceIterator ci = api.TessResultIteratorGetChoiceIterator(ri);
|
||||||
|
do {
|
||||||
|
if (indent) {
|
||||||
|
System.out.print("\t");
|
||||||
|
}
|
||||||
|
System.out.print("\t- ");
|
||||||
|
String choice = api.TessChoiceIteratorGetUTF8Text(ci);
|
||||||
|
logger.info(String.format("%s conf: %f", choice, api.TessChoiceIteratorConfidence(ci)));
|
||||||
|
indent = true;
|
||||||
|
} while (api.TessChoiceIteratorNext(ci) == ITessAPI.TRUE);
|
||||||
|
api.TessChoiceIteratorDelete(ci);
|
||||||
|
}
|
||||||
|
logger.info("---------------------------------------------");
|
||||||
|
api.TessDeleteText(symbol);
|
||||||
|
} while (api.TessResultIteratorNext(ri, level) == ITessAPI.TRUE);
|
||||||
|
}
|
||||||
|
|
||||||
|
assertTrue(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of ResultRenderer method, of class TessAPI.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testResultRenderer() throws Exception {
|
||||||
|
logger.info("TessResultRenderer");
|
||||||
|
String image = String.format("%s/%s", testResourcesDataPath, "eurotext.tif");
|
||||||
|
String output = "capi-test.txt";
|
||||||
|
int set_only_init_params = FALSE;
|
||||||
|
int oem = TessOcrEngineMode.OEM_DEFAULT;
|
||||||
|
PointerByReference configs = null;
|
||||||
|
int configs_size = 0;
|
||||||
|
|
||||||
|
String[] params = {"load_system_dawg", "tessedit_char_whitelist"};
|
||||||
|
String vals[] = {"F", ""}; //0123456789-.IThisalotfpnex
|
||||||
|
PointerByReference vars_vec = new PointerByReference();
|
||||||
|
vars_vec.setPointer(new StringArray(params));
|
||||||
|
PointerByReference vars_values = new PointerByReference();
|
||||||
|
vars_values.setPointer(new StringArray(vals));
|
||||||
|
NativeSize vars_vec_size = new NativeSize(params.length);
|
||||||
|
|
||||||
|
api.TessBaseAPISetOutputName(handle, output);
|
||||||
|
|
||||||
|
int rc = api.TessBaseAPIInit4(handle, datapath, language,
|
||||||
|
oem, configs, configs_size, vars_vec, vars_values, vars_vec_size, set_only_init_params);
|
||||||
|
|
||||||
|
if (rc != 0) {
|
||||||
|
api.TessBaseAPIDelete(handle);
|
||||||
|
logger.error("Could not initialize tesseract.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
String outputbase = "test/test-results/outputbase";
|
||||||
|
TessResultRenderer renderer = api.TessHOcrRendererCreate(outputbase);
|
||||||
|
api.TessResultRendererInsert(renderer, api.TessBoxTextRendererCreate(outputbase));
|
||||||
|
api.TessResultRendererInsert(renderer, api.TessTextRendererCreate(outputbase));
|
||||||
|
String dataPath = api.TessBaseAPIGetDatapath(handle);
|
||||||
|
api.TessResultRendererInsert(renderer, api.TessPDFRendererCreate(outputbase, dataPath));
|
||||||
|
int result = api.TessBaseAPIProcessPages(handle, image, null, 0, renderer);
|
||||||
|
|
||||||
|
if (result == FALSE) {
|
||||||
|
logger.error("Error during processing.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (; renderer != null; renderer = api.TessResultRendererNext(renderer)) {
|
||||||
|
String ext = api.TessResultRendererExtention(renderer).getString(0);
|
||||||
|
logger.info(String.format("TessResultRendererExtention: %s\nTessResultRendererTitle: %s\nTessResultRendererImageNum: %d",
|
||||||
|
ext,
|
||||||
|
api.TessResultRendererTitle(renderer).getString(0),
|
||||||
|
api.TessResultRendererImageNum(renderer)));
|
||||||
|
}
|
||||||
|
|
||||||
|
api.TessDeleteResultRenderer(renderer);
|
||||||
|
assertTrue(new File(outputbase + ".pdf").exists());
|
||||||
|
}
|
||||||
|
}
|
267
NGCC/Tess4J/test/net/sourceforge/tess4j/Tesseract1Test.java
Normal file
267
NGCC/Tess4J/test/net/sourceforge/tess4j/Tesseract1Test.java
Normal file
@ -0,0 +1,267 @@
|
|||||||
|
/**
|
||||||
|
* Copyright @ 2010 Quan Nguyen
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||||
|
* use this file except in compliance with the License. You may obtain a copy of
|
||||||
|
* the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package net.sourceforge.tess4j;
|
||||||
|
|
||||||
|
import java.awt.Rectangle;
|
||||||
|
import java.awt.image.BufferedImage;
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import javax.imageio.IIOImage;
|
||||||
|
import javax.imageio.ImageIO;
|
||||||
|
|
||||||
|
import net.sourceforge.tess4j.util.LoggHelper;
|
||||||
|
import net.sourceforge.tess4j.util.Utils;
|
||||||
|
import net.sourceforge.tess4j.util.ImageHelper;
|
||||||
|
import net.sourceforge.tess4j.util.ImageIOHelper;
|
||||||
|
import net.sourceforge.tess4j.ITesseract.RenderedFormat;
|
||||||
|
import net.sourceforge.tess4j.ITessAPI.TessPageIteratorLevel;
|
||||||
|
|
||||||
|
import com.recognition.software.jdeskew.ImageDeskew;
|
||||||
|
|
||||||
|
import org.junit.After;
|
||||||
|
import org.junit.AfterClass;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertArrayEquals;
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
|
public class Tesseract1Test {
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
|
||||||
|
static final double MINIMUM_DESKEW_THRESHOLD = 0.05d;
|
||||||
|
ITesseract instance;
|
||||||
|
|
||||||
|
private final String datapath = ".";
|
||||||
|
private final String testResourcesDataPath = "test/resources/test-data";
|
||||||
|
private final String expOCRResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 <lazy> #90 dog";
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void setUpClass() throws Exception {
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterClass
|
||||||
|
public static void tearDownClass() throws Exception {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() {
|
||||||
|
instance = new Tesseract1();
|
||||||
|
instance.setDatapath(new File(datapath).getPath());
|
||||||
|
}
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void tearDown() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of doOCR method, of class Tesseract1.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testDoOCR_File() throws Exception {
|
||||||
|
logger.info("doOCR on a PNG image");
|
||||||
|
File imageFile = new File(testResourcesDataPath, "eurotext.png");
|
||||||
|
String expResult = expOCRResult;
|
||||||
|
String result = instance.doOCR(imageFile);
|
||||||
|
logger.info(result);
|
||||||
|
assertEquals(expResult, result.substring(0, expResult.length()));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of doOCR method, of class Tesseract.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testDoOCR_UNLV_Zone_File() throws Exception {
|
||||||
|
logger.info("doOCR on a PNG image with UNLV zone file .uzn");
|
||||||
|
//UNLV zone format: left top width height label
|
||||||
|
File imageFile = new File(testResourcesDataPath, "eurotext_unlv.png");
|
||||||
|
String expResult = "& duck/goose, as 12.5% of E-mail\n\n"
|
||||||
|
+ "from aspammer@website.com is spam.\n\n"
|
||||||
|
+ "The (quick) [brown] {fox} jumps!\n"
|
||||||
|
+ "Over the $43,456.78 <lazy> #90 dog";
|
||||||
|
String result = instance.doOCR(imageFile);
|
||||||
|
logger.info(result);
|
||||||
|
assertEquals(expResult, result.trim());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of doOCR method, of class Tesseract.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testDoOCR_File_With_Configs() throws Exception {
|
||||||
|
logger.info("doOCR with configs");
|
||||||
|
File imageFile = new File(testResourcesDataPath, "eurotext.png");
|
||||||
|
String expResult = "[-0123456789.\n ]+";
|
||||||
|
List<String> configs = Arrays.asList("digits");
|
||||||
|
instance.setConfigs(configs);
|
||||||
|
String result = instance.doOCR(imageFile);
|
||||||
|
logger.info(result);
|
||||||
|
assertTrue(result.matches(expResult));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of doOCR method, of class Tesseract1.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testDoOCR_File_Rectangle() throws Exception {
|
||||||
|
logger.info("doOCR on a BMP image with bounding rectangle");
|
||||||
|
File imageFile = new File(testResourcesDataPath, "eurotext.bmp");
|
||||||
|
Rectangle rect = new Rectangle(0, 0, 1024, 800); // define an equal or smaller region of interest on the image
|
||||||
|
String expResult = expOCRResult;
|
||||||
|
String result = instance.doOCR(imageFile, rect);
|
||||||
|
logger.info(result);
|
||||||
|
assertEquals(expResult, result.substring(0, expResult.length()));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of doOCR method, of class Tesseract1.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testDoOCR_PDF() throws Exception {
|
||||||
|
logger.info("doOCR on a PDF document");
|
||||||
|
File imageFile = new File(testResourcesDataPath, "eurotext.pdf");
|
||||||
|
List<IIOImage> imageList = ImageIOHelper.getIIOImageList(imageFile);
|
||||||
|
String expResult = expOCRResult;
|
||||||
|
String result = instance.doOCR(imageList, null);
|
||||||
|
logger.info(result);
|
||||||
|
assertEquals(expResult, result.substring(0, expResult.length()));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of doOCR method, of class Tesseract1.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testDoOCR_BufferedImage() throws Exception {
|
||||||
|
logger.info("doOCR on a buffered image of a PNG");
|
||||||
|
File imageFile = new File(testResourcesDataPath, "eurotext.png");
|
||||||
|
BufferedImage bi = ImageIO.read(imageFile);
|
||||||
|
String expResult = expOCRResult;
|
||||||
|
String result = instance.doOCR(bi);
|
||||||
|
logger.info(result);
|
||||||
|
assertEquals(expResult, result.substring(0, expResult.length()));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of deskew algorithm.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testDoOCR_SkewedImage() throws Exception {
|
||||||
|
logger.info("doOCR on a skewed PNG image");
|
||||||
|
File imageFile = new File(testResourcesDataPath, "eurotext_deskew.png");
|
||||||
|
BufferedImage bi = ImageIO.read(imageFile);
|
||||||
|
ImageDeskew id = new ImageDeskew(bi);
|
||||||
|
double imageSkewAngle = id.getSkewAngle(); // determine skew angle
|
||||||
|
if ((imageSkewAngle > MINIMUM_DESKEW_THRESHOLD || imageSkewAngle < -(MINIMUM_DESKEW_THRESHOLD))) {
|
||||||
|
bi = ImageHelper.rotateImage(bi, -imageSkewAngle); // deskew image
|
||||||
|
}
|
||||||
|
|
||||||
|
String expResult = expOCRResult;
|
||||||
|
String result = instance.doOCR(bi);
|
||||||
|
logger.info(result);
|
||||||
|
assertEquals(expResult, result.substring(0, expResult.length()));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of createDocuments method, of class Tesseract.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testCreateDocuments() throws Exception {
|
||||||
|
logger.info("createDocuments for an image");
|
||||||
|
File imageFile1 = new File(testResourcesDataPath, "eurotext.pdf");
|
||||||
|
File imageFile2 = new File(testResourcesDataPath, "eurotext.png");
|
||||||
|
String outputbase1 = "test/test-results/docrenderer1-1";
|
||||||
|
String outputbase2 = "test/test-results/docrenderer1-2";
|
||||||
|
List<RenderedFormat> formats = new ArrayList<RenderedFormat>(Arrays.asList(RenderedFormat.HOCR, RenderedFormat.PDF, RenderedFormat.TEXT));
|
||||||
|
instance.createDocuments(new String[]{imageFile1.getPath(), imageFile2.getPath()}, new String[]{outputbase1, outputbase2}, formats);
|
||||||
|
assertTrue(new File(outputbase1 + ".pdf").exists());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of getWords method, of class Tesseract1.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testGetWords() throws Exception {
|
||||||
|
logger.info("getWords");
|
||||||
|
File imageFile = new File(testResourcesDataPath, "eurotext.tif");
|
||||||
|
|
||||||
|
String expResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 <lazy> #90 dog";
|
||||||
|
String[] expResults = expResult.split("\\s");
|
||||||
|
|
||||||
|
int pageIteratorLevel = TessPageIteratorLevel.RIL_WORD;
|
||||||
|
logger.info("PageIteratorLevel: " + Utils.getConstantName(pageIteratorLevel, TessPageIteratorLevel.class));
|
||||||
|
BufferedImage bi = ImageIO.read(imageFile);
|
||||||
|
List<Word> result = instance.getWords(bi, pageIteratorLevel);
|
||||||
|
|
||||||
|
// print the complete result
|
||||||
|
for (Word word : result) {
|
||||||
|
logger.info(word.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
List<String> text = new ArrayList<String>();
|
||||||
|
for (Word word : result.subList(0, expResults.length)) {
|
||||||
|
text.add(word.getText());
|
||||||
|
}
|
||||||
|
|
||||||
|
assertArrayEquals(expResults, text.toArray());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of getSegmentedRegions method, of class Tesseract1.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testGetSegmentedRegions() throws Exception {
|
||||||
|
logger.info("getSegmentedRegions at given TessPageIteratorLevel");
|
||||||
|
File imageFile = new File(testResourcesDataPath, "eurotext.png");
|
||||||
|
BufferedImage bi = ImageIO.read(imageFile);
|
||||||
|
int level = TessPageIteratorLevel.RIL_SYMBOL;
|
||||||
|
logger.info("PageIteratorLevel: " + Utils.getConstantName(level, TessPageIteratorLevel.class));
|
||||||
|
List<Rectangle> result = instance.getSegmentedRegions(bi, level);
|
||||||
|
for (int i = 0; i < result.size(); i++) {
|
||||||
|
Rectangle rect = result.get(i);
|
||||||
|
logger.info(String.format("Box[%d]: x=%d, y=%d, w=%d, h=%d", i, rect.x, rect.y, rect.width, rect.height));
|
||||||
|
}
|
||||||
|
|
||||||
|
assertTrue(result.size() > 0);
|
||||||
|
}
|
||||||
|
}
|
267
NGCC/Tess4J/test/net/sourceforge/tess4j/TesseractTest.java
Normal file
267
NGCC/Tess4J/test/net/sourceforge/tess4j/TesseractTest.java
Normal file
@ -0,0 +1,267 @@
|
|||||||
|
/**
|
||||||
|
* Copyright @ 2010 Quan Nguyen
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||||
|
* use this file except in compliance with the License. You may obtain a copy of
|
||||||
|
* the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package net.sourceforge.tess4j;
|
||||||
|
|
||||||
|
import java.awt.Rectangle;
|
||||||
|
import java.awt.image.BufferedImage;
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import javax.imageio.IIOImage;
|
||||||
|
import javax.imageio.ImageIO;
|
||||||
|
|
||||||
|
import net.sourceforge.tess4j.util.ImageHelper;
|
||||||
|
import net.sourceforge.tess4j.util.ImageIOHelper;
|
||||||
|
import net.sourceforge.tess4j.util.LoggHelper;
|
||||||
|
import net.sourceforge.tess4j.util.Utils;
|
||||||
|
|
||||||
|
import net.sourceforge.tess4j.ITesseract.RenderedFormat;
|
||||||
|
import net.sourceforge.tess4j.ITessAPI.TessPageIteratorLevel;
|
||||||
|
|
||||||
|
import static org.junit.Assert.*;
|
||||||
|
|
||||||
|
import com.recognition.software.jdeskew.ImageDeskew;
|
||||||
|
|
||||||
|
import org.junit.After;
|
||||||
|
import org.junit.AfterClass;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
public class TesseractTest {
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
|
||||||
|
static final double MINIMUM_DESKEW_THRESHOLD = 0.05d;
|
||||||
|
ITesseract instance;
|
||||||
|
|
||||||
|
private final String datapath = ".";
|
||||||
|
private final String testResourcesDataPath = "test/resources/test-data";
|
||||||
|
private final String expOCRResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 <lazy> #90 dog";
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void setUpClass() throws Exception {
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterClass
|
||||||
|
public static void tearDownClass() throws Exception {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() {
|
||||||
|
instance = new Tesseract();
|
||||||
|
instance.setDatapath(new File(datapath).getPath());
|
||||||
|
}
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void tearDown() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of doOCR method, of class Tesseract.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testDoOCR_File() throws Exception {
|
||||||
|
logger.info("doOCR on a PNG image");
|
||||||
|
File imageFile = new File(testResourcesDataPath, "eurotext.png");
|
||||||
|
String expResult = expOCRResult;
|
||||||
|
String result = instance.doOCR(imageFile);
|
||||||
|
logger.info(result);
|
||||||
|
assertEquals(expResult, result.substring(0, expResult.length()));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of doOCR method, of class Tesseract.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testDoOCR_UNLV_Zone_File() throws Exception {
|
||||||
|
logger.info("doOCR on a PNG image with UNLV zone file .uzn");
|
||||||
|
//UNLV zone format: left top width height label
|
||||||
|
File imageFile = new File(testResourcesDataPath, "eurotext_unlv.png");
|
||||||
|
String expResult = "& duck/goose, as 12.5% of E-mail\n\n"
|
||||||
|
+ "from aspammer@website.com is spam.\n\n"
|
||||||
|
+ "The (quick) [brown] {fox} jumps!\n"
|
||||||
|
+ "Over the $43,456.78 <lazy> #90 dog";
|
||||||
|
String result = instance.doOCR(imageFile);
|
||||||
|
logger.info(result);
|
||||||
|
assertEquals(expResult, result.trim());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of doOCR method, of class Tesseract.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testDoOCR_File_With_Configs() throws Exception {
|
||||||
|
logger.info("doOCR with configs");
|
||||||
|
File imageFile = new File(testResourcesDataPath, "eurotext.png");
|
||||||
|
String expResult = "[-0123456789.\n ]+";
|
||||||
|
List<String> configs = Arrays.asList("digits");
|
||||||
|
instance.setConfigs(configs);
|
||||||
|
String result = instance.doOCR(imageFile);
|
||||||
|
logger.info(result);
|
||||||
|
assertTrue(result.matches(expResult));
|
||||||
|
instance.setConfigs(null); // since Tesseract instance is a singleton, clear configs so the effects do not carry on into subsequent runs.
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of doOCR method, of class Tesseract.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testDoOCR_File_Rectangle() throws Exception {
|
||||||
|
logger.info("doOCR on a BMP image with bounding rectangle");
|
||||||
|
File imageFile = new File(testResourcesDataPath, "eurotext.bmp");
|
||||||
|
Rectangle rect = new Rectangle(0, 0, 1024, 800); // define an equal or smaller region of interest on the image
|
||||||
|
String expResult = expOCRResult;
|
||||||
|
String result = instance.doOCR(imageFile, rect);
|
||||||
|
logger.info(result);
|
||||||
|
assertEquals(expResult, result.substring(0, expResult.length()));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of doOCR method, of class Tesseract.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testDoOCR_PDF() throws Exception {
|
||||||
|
logger.info("doOCR on a PDF document");
|
||||||
|
File imageFile = new File(testResourcesDataPath, "eurotext.pdf");
|
||||||
|
List<IIOImage> imageList = ImageIOHelper.getIIOImageList(imageFile);
|
||||||
|
String expResult = expOCRResult;
|
||||||
|
String result = instance.doOCR(imageList, null);
|
||||||
|
logger.info(result);
|
||||||
|
assertEquals(expResult, result.substring(0, expResult.length()));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of doOCR method, of class Tesseract.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testDoOCR_BufferedImage() throws Exception {
|
||||||
|
logger.info("doOCR on a buffered image of a PNG");
|
||||||
|
File imageFile = new File(testResourcesDataPath, "eurotext.png");
|
||||||
|
BufferedImage bi = ImageIO.read(imageFile);
|
||||||
|
String expResult = expOCRResult;
|
||||||
|
String result = instance.doOCR(bi);
|
||||||
|
logger.info(result);
|
||||||
|
assertEquals(expResult, result.substring(0, expResult.length()));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of deskew algorithm.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testDoOCR_SkewedImage() throws Exception {
|
||||||
|
logger.info("doOCR on a skewed PNG image");
|
||||||
|
File imageFile = new File(testResourcesDataPath, "eurotext_deskew.png");
|
||||||
|
BufferedImage bi = ImageIO.read(imageFile);
|
||||||
|
ImageDeskew id = new ImageDeskew(bi);
|
||||||
|
double imageSkewAngle = id.getSkewAngle(); // determine skew angle
|
||||||
|
if ((imageSkewAngle > MINIMUM_DESKEW_THRESHOLD || imageSkewAngle < -(MINIMUM_DESKEW_THRESHOLD))) {
|
||||||
|
bi = ImageHelper.rotateImage(bi, -imageSkewAngle); // deskew image
|
||||||
|
}
|
||||||
|
|
||||||
|
String expResult = expOCRResult;
|
||||||
|
String result = instance.doOCR(bi);
|
||||||
|
logger.info(result);
|
||||||
|
assertEquals(expResult, result.substring(0, expResult.length()));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of createDocuments method, of class Tesseract.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testCreateDocuments() throws Exception {
|
||||||
|
logger.info("createDocuments for multiple images");
|
||||||
|
File imageFile1 = new File(testResourcesDataPath, "eurotext.pdf");
|
||||||
|
File imageFile2 = new File(testResourcesDataPath, "eurotext.png");
|
||||||
|
String outputbase1 = "test/test-results/docrenderer-1";
|
||||||
|
String outputbase2 = "test/test-results/docrenderer-2";
|
||||||
|
List<RenderedFormat> formats = new ArrayList<RenderedFormat>(Arrays.asList(RenderedFormat.HOCR, RenderedFormat.PDF, RenderedFormat.TEXT));
|
||||||
|
instance.createDocuments(new String[]{imageFile1.getPath(), imageFile2.getPath()}, new String[]{outputbase1, outputbase2}, formats);
|
||||||
|
assertTrue(new File(outputbase1 + ".pdf").exists());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of getWords method, of class Tesseract.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testGetWords() throws Exception {
|
||||||
|
logger.info("getWords");
|
||||||
|
File imageFile = new File(testResourcesDataPath, "eurotext.tif");
|
||||||
|
|
||||||
|
String expResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 <lazy> #90 dog";
|
||||||
|
String[] expResults = expResult.split("\\s");
|
||||||
|
|
||||||
|
int pageIteratorLevel = TessPageIteratorLevel.RIL_WORD;
|
||||||
|
logger.info("PageIteratorLevel: " + Utils.getConstantName(pageIteratorLevel, TessPageIteratorLevel.class));
|
||||||
|
BufferedImage bi = ImageIO.read(imageFile);
|
||||||
|
List<Word> result = instance.getWords(bi, pageIteratorLevel);
|
||||||
|
|
||||||
|
//print the complete result
|
||||||
|
for (Word word : result) {
|
||||||
|
logger.info(word.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
List<String> text = new ArrayList<String>();
|
||||||
|
for (Word word : result.subList(0, expResults.length)) {
|
||||||
|
text.add(word.getText());
|
||||||
|
}
|
||||||
|
|
||||||
|
assertArrayEquals(expResults, text.toArray());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of getSegmentedRegions method, of class Tesseract.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testGetSegmentedRegions() throws Exception {
|
||||||
|
logger.info("getSegmentedRegions at given TessPageIteratorLevel");
|
||||||
|
File imageFile = new File(testResourcesDataPath, "eurotext.png");
|
||||||
|
BufferedImage bi = ImageIO.read(imageFile);
|
||||||
|
int level = TessPageIteratorLevel.RIL_SYMBOL;
|
||||||
|
logger.info("PageIteratorLevel: " + Utils.getConstantName(level, TessPageIteratorLevel.class));
|
||||||
|
List<Rectangle> result = instance.getSegmentedRegions(bi, level);
|
||||||
|
for (int i = 0; i < result.size(); i++) {
|
||||||
|
Rectangle rect = result.get(i);
|
||||||
|
logger.info(String.format("Box[%d]: x=%d, y=%d, w=%d, h=%d", i, rect.x, rect.y, rect.width, rect.height));
|
||||||
|
}
|
||||||
|
|
||||||
|
assertTrue(result.size() > 0);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,84 @@
|
|||||||
|
/**
|
||||||
|
* Copyright @ 2008 Quan Nguyen
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||||
|
* use this file except in compliance with the License. You may obtain a copy of
|
||||||
|
* the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package net.sourceforge.tess4j;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.net.URISyntaxException;
|
||||||
|
import java.net.URL;
|
||||||
|
|
||||||
|
|
||||||
|
import net.sourceforge.tess4j.util.LoadLibs;
|
||||||
|
|
||||||
|
import net.sourceforge.tess4j.util.LoggHelper;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
public class TestFolderExtraction {
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFolderExtraction() {
|
||||||
|
|
||||||
|
File tessDataFolder = null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Loads the image from resources.
|
||||||
|
*/
|
||||||
|
String filename = String.format("%s/%s", "/test-data", "eurotext.pdf");
|
||||||
|
URL defaultImage = getClass().getResource(filename);
|
||||||
|
File imageFile = new File(defaultImage.toURI());
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts <code>tessdata</code> folder into a temp folder.
|
||||||
|
*/
|
||||||
|
logger.info("Loading the tessdata folder into a temporary folder.");
|
||||||
|
tessDataFolder = LoadLibs.extractTessResources("tessdata");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets tesseract instance and sets data path.
|
||||||
|
*/
|
||||||
|
ITesseract instance = new Tesseract();
|
||||||
|
|
||||||
|
if (tessDataFolder != null) {
|
||||||
|
logger.info(tessDataFolder.getAbsolutePath());
|
||||||
|
instance.setDatapath(tessDataFolder.getParent());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs OCR on the image.
|
||||||
|
*/
|
||||||
|
String result = instance.doOCR(imageFile);
|
||||||
|
logger.info(result);
|
||||||
|
|
||||||
|
} catch (TesseractException e) {
|
||||||
|
logger.error(e.getMessage());
|
||||||
|
logger.error(e.getMessage(), e);
|
||||||
|
} catch (URISyntaxException e) {
|
||||||
|
logger.error(e.getMessage(), e);
|
||||||
|
}
|
||||||
|
|
||||||
|
// checks if tessdata folder exists
|
||||||
|
assertTrue(tessDataFolder != null && tessDataFolder.exists());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
45
NGCC/Tess4J/test/net/sourceforge/tess4j/Word.java
Normal file
45
NGCC/Tess4J/test/net/sourceforge/tess4j/Word.java
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
package net.sourceforge.tess4j;
|
||||||
|
|
||||||
|
import java.awt.Rectangle;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encapsulates Tesseract results.
|
||||||
|
*/
|
||||||
|
class Word {
|
||||||
|
|
||||||
|
private final String text;
|
||||||
|
private final float confidence;
|
||||||
|
private final Rectangle rect;
|
||||||
|
|
||||||
|
public Word(String text, float confidence, Rectangle rect) {
|
||||||
|
this.text = text;
|
||||||
|
this.confidence = confidence;
|
||||||
|
this.rect = rect;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the text
|
||||||
|
*/
|
||||||
|
public String getText() {
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the confidence
|
||||||
|
*/
|
||||||
|
public float getConfidence() {
|
||||||
|
return confidence;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the bounding box
|
||||||
|
*/
|
||||||
|
public Rectangle getRect() {
|
||||||
|
return rect;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return String.format("%s\t[Confidence: %f Bounding box: %d %d %d %d]", text, confidence, rect.x, rect.y, rect.width, rect.height);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,128 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2014 Quan Nguyen.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package net.sourceforge.tess4j.util;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import org.junit.After;
|
||||||
|
import org.junit.AfterClass;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
import static org.junit.Assert.*;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
public class PdfUtilitiesTest {
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
|
||||||
|
private final String TEST_RESOURCES_DATA_PATH = "test/resources/test-data";
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void setUpClass() {
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterClass
|
||||||
|
public static void tearDownClass() {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() {
|
||||||
|
System.setProperty(PdfUtilities.PDF_LIBRARY, PdfUtilities.PDFBOX); // Note: comment out to test Ghostscript
|
||||||
|
}
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void tearDown() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of convertPdf2Tiff method, of class PdfUtilities.
|
||||||
|
*
|
||||||
|
* @throws java.lang.Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testConvertPdf2Tiff() throws Exception {
|
||||||
|
logger.info("convertPdf2Tiff");
|
||||||
|
File inputPdfFile = new File(TEST_RESOURCES_DATA_PATH, "eurotext.pdf");
|
||||||
|
File result = PdfUtilities.convertPdf2Tiff(inputPdfFile);
|
||||||
|
result.deleteOnExit();
|
||||||
|
assertTrue(result.exists());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of convertPdf2Png method, of class PdfUtilities.
|
||||||
|
*
|
||||||
|
* @throws java.io.IOException
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testConvertPdf2Png() throws IOException {
|
||||||
|
logger.info("convertPdf2Png");
|
||||||
|
File inputPdfFile = new File(TEST_RESOURCES_DATA_PATH, "eurotext.pdf");
|
||||||
|
File[] results = PdfUtilities.convertPdf2Png(inputPdfFile);
|
||||||
|
assertTrue(results.length > 0);
|
||||||
|
|
||||||
|
//clean up
|
||||||
|
File parentDir = results[0].getParentFile();
|
||||||
|
for (File result : results) {
|
||||||
|
result.delete();
|
||||||
|
}
|
||||||
|
parentDir.delete();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of splitPdf method, of class PdfUtilities.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testSplitPdf() {
|
||||||
|
logger.info("splitPdf");
|
||||||
|
File inputPdfFile = new File(TEST_RESOURCES_DATA_PATH, "multipage-pdf.pdf");
|
||||||
|
File outputPdfFile = new File("test/test-results/multipage-pdf_splitted.pdf");
|
||||||
|
int startPage = 2;
|
||||||
|
int endPage = 3;
|
||||||
|
int expResult = 2;
|
||||||
|
PdfUtilities.splitPdf(inputPdfFile, outputPdfFile, startPage, endPage);
|
||||||
|
int pageCount = PdfUtilities.getPdfPageCount(outputPdfFile);
|
||||||
|
assertEquals(expResult, pageCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of getPdfPageCount method, of class PdfUtilities.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testGetPdfPageCount() {
|
||||||
|
logger.info("getPdfPageCount");
|
||||||
|
File inputPdfFile = new File(TEST_RESOURCES_DATA_PATH, "multipage-pdf.pdf");
|
||||||
|
int expResult = 5;
|
||||||
|
int result = PdfUtilities.getPdfPageCount(inputPdfFile);
|
||||||
|
assertEquals(expResult, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of mergePdf method, of class PdfUtilities.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testMergePdf() {
|
||||||
|
logger.info("mergePdf");
|
||||||
|
File pdfPartOne = new File(TEST_RESOURCES_DATA_PATH, "eurotext.pdf");
|
||||||
|
File pdfPartTwo = new File(TEST_RESOURCES_DATA_PATH, "multipage-pdf.pdf");
|
||||||
|
int expResult = 6;
|
||||||
|
File outputPdfFile = new File("test/test-results", "multipage-pdf_merged.pdf");
|
||||||
|
File[] inputPdfFiles = {pdfPartOne, pdfPartTwo};
|
||||||
|
PdfUtilities.mergePdf(inputPdfFiles, outputPdfFile);
|
||||||
|
assertEquals(expResult, PdfUtilities.getPdfPageCount(outputPdfFile));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
BIN
NGCC/Tess4J/test/resources/test-data/eurotext.bmp
Normal file
BIN
NGCC/Tess4J/test/resources/test-data/eurotext.bmp
Normal file
Binary file not shown.
After Width: | Height: | Size: 100 KiB |
BIN
NGCC/Tess4J/test/resources/test-data/eurotext.pdf
Normal file
BIN
NGCC/Tess4J/test/resources/test-data/eurotext.pdf
Normal file
Binary file not shown.
BIN
NGCC/Tess4J/test/resources/test-data/eurotext.png
Normal file
BIN
NGCC/Tess4J/test/resources/test-data/eurotext.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 14 KiB |
BIN
NGCC/Tess4J/test/resources/test-data/eurotext.tif
Normal file
BIN
NGCC/Tess4J/test/resources/test-data/eurotext.tif
Normal file
Binary file not shown.
BIN
NGCC/Tess4J/test/resources/test-data/eurotext_deskew.png
Normal file
BIN
NGCC/Tess4J/test/resources/test-data/eurotext_deskew.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 200 KiB |
BIN
NGCC/Tess4J/test/resources/test-data/eurotext_unlv.png
Normal file
BIN
NGCC/Tess4J/test/resources/test-data/eurotext_unlv.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 14 KiB |
3
NGCC/Tess4J/test/resources/test-data/eurotext_unlv.uzn
Normal file
3
NGCC/Tess4J/test/resources/test-data/eurotext_unlv.uzn
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
97 162 747 50 ThirdLine
|
||||||
|
97 209 828 55 FourthLine
|
||||||
|
92 56 810 107 First2Lines
|
BIN
NGCC/Tess4J/test/resources/test-data/multipage-pdf.pdf
Normal file
BIN
NGCC/Tess4J/test/resources/test-data/multipage-pdf.pdf
Normal file
Binary file not shown.
137
NGCC/Tess4J/versionchanges.txt
Normal file
137
NGCC/Tess4J/versionchanges.txt
Normal file
@ -0,0 +1,137 @@
|
|||||||
|
Tess4J Change Summary
|
||||||
|
|
||||||
|
Version 0.1 - initial release (14 Aug 2010):
|
||||||
|
- Java JNA-based wrapper for Tesseract OCR DLL 2.04
|
||||||
|
- Support uncompressed, binary TIFF images
|
||||||
|
|
||||||
|
Version 0.2 (16 Aug 2010):
|
||||||
|
- Add support for more image formats (PNG, BMP, GIF, PDF, JPEG)
|
||||||
|
- Add support for compressed, grayscale and colored images
|
||||||
|
|
||||||
|
Version 0.3 (22 Aug 2010):
|
||||||
|
- Include API support for BufferedImage
|
||||||
|
- Clean up codes. Remove unsupported API and files
|
||||||
|
- Document the API
|
||||||
|
|
||||||
|
Version 0.3.1 (26 Aug 2010):
|
||||||
|
- Send only pixel data, not whole image data, to Tesseract engine, to fix a bug that has erroneously put some words at beginning of line towards end of line
|
||||||
|
|
||||||
|
Version 0.4 (1 Nov 2010):
|
||||||
|
- Add JNA Direct Mapping calls, which can provide performance near that of custom JNI
|
||||||
|
|
||||||
|
Version 1.0 (30 October 2012):
|
||||||
|
- Upgrade to Tesseract 3.02 (r798), which is not backward compatible with Tesseract 2.04.
|
||||||
|
- Implement a new JNA wrapper for the new Tesseract OCR API
|
||||||
|
- Add more unit test cases
|
||||||
|
- Update documentation
|
||||||
|
|
||||||
|
Version 1.1 (3 March 2013)
|
||||||
|
- Update Tesseract DLL to r828
|
||||||
|
- Additional API methods, image helper methods, and unit test cases
|
||||||
|
- Improve handling of Unicode character encoding
|
||||||
|
- Fix memory leaks
|
||||||
|
- Add support for determining skew angle and image rotation
|
||||||
|
|
||||||
|
Version 1.2 (22 September 2013)
|
||||||
|
- Update Tesseract DLL to r866
|
||||||
|
- More efficient OCR of multiple images
|
||||||
|
- Various minor improvements
|
||||||
|
- Update JNA to v4.0
|
||||||
|
|
||||||
|
Version 1.3 (31 May 2014)
|
||||||
|
- Update JNA to v4.1.0
|
||||||
|
- Update Ghost4J to v0.5.1
|
||||||
|
- Refactoring
|
||||||
|
- Bundle Tesseract and Leptonica 64-bit DLLs
|
||||||
|
|
||||||
|
Version 1.4 (18 January 2015)
|
||||||
|
- Refactor to reduce code duplication
|
||||||
|
- Embed Windows native resources in JAR
|
||||||
|
- Autoload Windows native libraries
|
||||||
|
|
||||||
|
Version 1.4.1 (24 January 2015)
|
||||||
|
- Enable use of jna.library.path system property for user-customizable path
|
||||||
|
|
||||||
|
Version 1.5 (13 March 2015)
|
||||||
|
- Add UNLV zone file support
|
||||||
|
- Refactor
|
||||||
|
|
||||||
|
Version 2.0 (29 March 2015)
|
||||||
|
- Upgrade to Tesseract 3.03 (r1050), which is compatible with Tesseract 3.03RC on Linux
|
||||||
|
- Refactor Tesseract class for extensibility and thread-safety
|
||||||
|
- Update English language data for Tesseract 3.02
|
||||||
|
|
||||||
|
Version 3.0 (25 December 2015)
|
||||||
|
- Upgrade to Tesseract 3.04 (953523b)
|
||||||
|
- Include Lept4J library
|
||||||
|
- Incorporate slf4j and logback libraries for logging
|
||||||
|
- Make GhostScript calls thread safe
|
||||||
|
|
||||||
|
Version 3.1 (21 March 2016)
|
||||||
|
- Update Tesseract to 3.04.01 (4ef68a0)
|
||||||
|
- Use Lept4J-1.1.2 (Leptonica 1.72)
|
||||||
|
- Update JNA to 4.2.2
|
||||||
|
- Update Ghost4J to 1.0.1
|
||||||
|
- Delete ResultRenderer after use to release PDF file handler
|
||||||
|
|
||||||
|
Version 3.2 (15 May 2016)
|
||||||
|
- Revert JNA to 4.1.0 due to "Invalid calling convention 63" errors invoking GhostScript via Ghost4J on Linux
|
||||||
|
- Update Lept4J to 1.2.2 (Leptonica 1.73)
|
||||||
|
- Recompile Tesseract 3.04.01 DLL against Leptonica 1.73
|
||||||
|
- Update GhostScript Windows binary to 9.19
|
||||||
|
|
||||||
|
Version 3.2.1 (29 May 2016)
|
||||||
|
- Properly release Box and Boxa resources
|
||||||
|
- Update Lept4J to 1.2.3
|
||||||
|
|
||||||
|
Version 3.2.2 (16 February 2017)
|
||||||
|
- Update GhostScript to 9.20
|
||||||
|
- Fix possible NPE with PDF-related codes
|
||||||
|
- Update dependencies
|
||||||
|
- Additional image utility methods
|
||||||
|
|
||||||
|
Version 3.3.0 (16 February 2017)
|
||||||
|
- Upgrade to Tesseract 3.05 (2ca5d0a)
|
||||||
|
- Update Lept4J to 1.3.0 (Leptonica 1.74.1)
|
||||||
|
|
||||||
|
Version 3.3.1 (23 March 2017)
|
||||||
|
- Update Lept4J to 1.3.1
|
||||||
|
- Update other dependencies
|
||||||
|
|
||||||
|
Version 3.4.0 (1 June 2017)
|
||||||
|
- Upgrade to Tesseract 3.05.01 (2158661)
|
||||||
|
- Update Lept4J to 1.4.0
|
||||||
|
- Add support for jboss-vfs protocol
|
||||||
|
|
||||||
|
Version 3.4.1 (22 September 2017)
|
||||||
|
- Not extract/copy native resource if it exists and has same file size
|
||||||
|
- Update Tesseract 3.05.01 (e2e79c4); link against Leptonica 1.74.4
|
||||||
|
- Update Lept4J to 1.6.1
|
||||||
|
|
||||||
|
Version 3.4.2 (14 November 2017)
|
||||||
|
- Update Lept4J to 1.6.2
|
||||||
|
- Update GhostScript to 9.22
|
||||||
|
- Improve handling of PDF files in multi-threaded environment
|
||||||
|
- Lift limits on number of pages in PDF
|
||||||
|
- Use TESSDATA_PREFIX environment variable by default, if defined
|
||||||
|
|
||||||
|
Version 3.4.3 (14 January 2018)
|
||||||
|
- Not extract/copy resource if it exists and has same file size
|
||||||
|
|
||||||
|
Version 3.4.4 (22 February 2018)
|
||||||
|
- Exclude logback.xml from JAR
|
||||||
|
- Add image rotate and deskew methods
|
||||||
|
- Update Lept4J to 1.6.3
|
||||||
|
|
||||||
|
Version 3.4.5 (21 March 2018)
|
||||||
|
- Remove GS DLL due to license incompatibility
|
||||||
|
- Use PDFBox
|
||||||
|
|
||||||
|
Version 3.4.6 (25 March 2018)
|
||||||
|
- Update PDFBox dependencies
|
||||||
|
|
||||||
|
Version 3.4.7 (16 April 2018)
|
||||||
|
- Update jai-imageio-core to 1.4.0 for Java 9 fixes
|
||||||
|
|
||||||
|
Version 3.4.8 (2 May 2018)
|
||||||
|
- Fix a path issue when extracting resources from JAR to temp directory on Windows server
|
BIN
NGCC/jar/commons-beanutils-1.9.2.jar
Normal file
BIN
NGCC/jar/commons-beanutils-1.9.2.jar
Normal file
Binary file not shown.
BIN
NGCC/jar/commons-io-2.6.jar
Normal file
BIN
NGCC/jar/commons-io-2.6.jar
Normal file
Binary file not shown.
BIN
NGCC/jar/fontbox-2.0.9.jar
Normal file
BIN
NGCC/jar/fontbox-2.0.9.jar
Normal file
Binary file not shown.
BIN
NGCC/jar/ghost4j-1.0.1.jar
Normal file
BIN
NGCC/jar/ghost4j-1.0.1.jar
Normal file
Binary file not shown.
BIN
NGCC/jar/hamcrest-core-1.3.jar
Normal file
BIN
NGCC/jar/hamcrest-core-1.3.jar
Normal file
Binary file not shown.
BIN
NGCC/jar/itext-2.1.7.jar
Normal file
BIN
NGCC/jar/itext-2.1.7.jar
Normal file
Binary file not shown.
BIN
NGCC/jar/jai-imageio-core-1.4.0.jar
Normal file
BIN
NGCC/jar/jai-imageio-core-1.4.0.jar
Normal file
Binary file not shown.
BIN
NGCC/jar/jbig2-imageio-3.0.0.jar
Normal file
BIN
NGCC/jar/jbig2-imageio-3.0.0.jar
Normal file
Binary file not shown.
BIN
NGCC/jar/jboss-vfs-3.2.12.Final.jar
Normal file
BIN
NGCC/jar/jboss-vfs-3.2.12.Final.jar
Normal file
Binary file not shown.
BIN
NGCC/jar/jcl-over-slf4j-1.7.25.jar
Normal file
BIN
NGCC/jar/jcl-over-slf4j-1.7.25.jar
Normal file
Binary file not shown.
BIN
NGCC/jar/jna-4.1.0.jar
Normal file
BIN
NGCC/jar/jna-4.1.0.jar
Normal file
Binary file not shown.
BIN
NGCC/jar/jul-to-slf4j-1.7.25.jar
Normal file
BIN
NGCC/jar/jul-to-slf4j-1.7.25.jar
Normal file
Binary file not shown.
BIN
NGCC/jar/junit-4.12.jar
Normal file
BIN
NGCC/jar/junit-4.12.jar
Normal file
Binary file not shown.
BIN
NGCC/jar/lept4j-1.6.4.jar
Normal file
BIN
NGCC/jar/lept4j-1.6.4.jar
Normal file
Binary file not shown.
BIN
NGCC/jar/log4j-1.2.17.jar
Normal file
BIN
NGCC/jar/log4j-1.2.17.jar
Normal file
Binary file not shown.
BIN
NGCC/jar/log4j-over-slf4j-1.7.25.jar
Normal file
BIN
NGCC/jar/log4j-over-slf4j-1.7.25.jar
Normal file
Binary file not shown.
BIN
NGCC/jar/logback-classic-1.2.3.jar
Normal file
BIN
NGCC/jar/logback-classic-1.2.3.jar
Normal file
Binary file not shown.
BIN
NGCC/jar/logback-core-1.2.3.jar
Normal file
BIN
NGCC/jar/logback-core-1.2.3.jar
Normal file
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user