Update arborescence 2

2019-10-10 18:01:08 +02:00
parent 29dba04efb
commit a0b8e315db
167 changed files with 3 additions and 5 deletions
--- a/Tess4J/src/com/recognition/software/jdeskew/ImageDeskew.java
+++ b/Tess4J/src/com/recognition/software/jdeskew/ImageDeskew.java
@@ -0,0 +1,175 @@
+/**
+ * <a url=http://www.jdeskew.com/>JDeskew</a>
+ */
+package com.recognition.software.jdeskew;
+
+import java.awt.image.BufferedImage;
+
+public class ImageDeskew {
+
+    /**
+     * Representation of a line in the image.
+     */
+    public class HoughLine {
+
+        // count of points in the line
+        public int count = 0;
+        // index in matrix.
+        public int index = 0;
+        // the line is represented as all x, y that solve y * cos(alpha) - x *
+        // sin(alpha) = d
+        public double alpha;
+        public double d;
+    }
+
+    // the source image
+    private BufferedImage cImage;
+    // the range of angles to search for lines
+    private double cAlphaStart = -20;
+    private double cAlphaStep = 0.2;
+    private int cSteps = 40 * 5;
+    // pre-calculation of sin and cos
+    private double[] cSinA;
+    private double[] cCosA;
+    // range of d
+    private double cDMin;
+    private double cDStep = 1.0;
+    private int cDCount;
+    // count of points that fit in a line
+    private int[] cHMatrix;
+
+    /**
+     * Constructor.
+     * 
+     * @param image 
+     */
+    public ImageDeskew(BufferedImage image) {
+        this.cImage = image;
+    }
+
+    /**
+     * Calculates the skew angle of the image cImage.
+     * 
+     * @return 
+     */
+    public double getSkewAngle() {
+        ImageDeskew.HoughLine[] hl;
+        double sum = 0.0;
+        int count = 0;
+
+        // perform Hough Transformation
+        calc();
+        // top 20 of the detected lines in the image
+        hl = getTop(20);
+
+        if (hl.length >= 20) {
+            // average angle of the lines
+            for (int i = 0; i < 19; i++) {
+                sum += hl[i].alpha;
+                count++;
+            }
+            return (sum / count);
+        } else {
+            return 0.0d;
+        }
+    }
+
+    // calculate the count lines in the image with most points
+    private ImageDeskew.HoughLine[] getTop(int count) {
+
+        ImageDeskew.HoughLine[] hl = new ImageDeskew.HoughLine[count];
+        for (int i = 0; i < count; i++) {
+            hl[i] = new ImageDeskew.HoughLine();
+        }
+
+        ImageDeskew.HoughLine tmp;
+
+        for (int i = 0; i < (this.cHMatrix.length - 1); i++) {
+            if (this.cHMatrix[i] > hl[count - 1].count) {
+                hl[count - 1].count = this.cHMatrix[i];
+                hl[count - 1].index = i;
+                int j = count - 1;
+                while ((j > 0) && (hl[j].count > hl[j - 1].count)) {
+                    tmp = hl[j];
+                    hl[j] = hl[j - 1];
+                    hl[j - 1] = tmp;
+                    j--;
+                }
+            }
+        }
+
+        int alphaIndex;
+        int dIndex;
+        
+        for (int i = 0; i < count; i++) {
+            dIndex = hl[i].index / cSteps; // integer division, no
+            // remainder
+            alphaIndex = hl[i].index - dIndex * cSteps;
+            hl[i].alpha = getAlpha(alphaIndex);
+            hl[i].d = dIndex + cDMin;
+        }
+
+        return hl;
+    }
+
+    // Hough Transformation
+    private void calc() {
+        int hMin = (int) ((this.cImage.getHeight()) / 4.0);
+        int hMax = (int) ((this.cImage.getHeight()) * 3.0 / 4.0);
+        init();
+
+        for (int y = hMin; y < hMax; y++) {
+            for (int x = 1; x < (this.cImage.getWidth() - 2); x++) {
+                // only lower edges are considered
+                if (ImageUtil.isBlack(this.cImage, x, y)) {
+                    if (!ImageUtil.isBlack(this.cImage, x, y + 1)) {
+                        calc(x, y);
+                    }
+                }
+            }
+        }
+
+    }
+
+    // calculate all lines through the point (x,y)
+    private void calc(int x, int y) {
+        double d;
+        int dIndex;
+        int index;
+
+        for (int alpha = 0; alpha < (this.cSteps - 1); alpha++) {
+            d = y * this.cCosA[alpha] - x * this.cSinA[alpha];
+            dIndex = (int) (d - this.cDMin);
+            index = dIndex * this.cSteps + alpha;
+            try {
+                this.cHMatrix[index] += 1;
+            } catch (Exception ex) {
+                System.out.println(ex.toString());
+            }
+        }
+    }
+
+    private void init() {
+
+        double angle;
+
+        // pre-calculation of sin and cos
+        this.cSinA = new double[this.cSteps - 1];
+        this.cCosA = new double[this.cSteps - 1];
+
+        for (int i = 0; i < (this.cSteps - 1); i++) {
+            angle = getAlpha(i) * Math.PI / 180.0;
+            this.cSinA[i] = Math.sin(angle);
+            this.cCosA[i] = Math.cos(angle);
+        }
+
+        // range of d
+        this.cDMin = -this.cImage.getWidth();
+        this.cDCount = (int) (2.0 * ((this.cImage.getWidth() + this.cImage.getHeight())) / this.cDStep);
+        this.cHMatrix = new int[this.cDCount * this.cSteps];
+    }
+
+    public double getAlpha(int index) {
+        return this.cAlphaStart + (index * this.cAlphaStep);
+    }
+}
--- a/Tess4J/src/com/recognition/software/jdeskew/ImageUtil.java
+++ b/Tess4J/src/com/recognition/software/jdeskew/ImageUtil.java
@@ -0,0 +1,132 @@
+/**
+ * <a url=http://www.jdeskew.com/>JDeskew</a>
+ */
+package com.recognition.software.jdeskew;
+
+import java.awt.Color;
+import java.awt.Graphics2D;
+import java.awt.RenderingHints;
+import java.awt.geom.AffineTransform;
+import java.awt.image.BufferedImage;
+import java.awt.image.WritableRaster;
+
+public class ImageUtil {
+
+    /**
+     * Whether the pixel is black.
+     * 
+     * @param image source image
+     * @param x
+     * @param y
+     * @return 
+     */
+    public static boolean isBlack(BufferedImage image, int x, int y) {
+        if (image.getType() == BufferedImage.TYPE_BYTE_BINARY) {
+            WritableRaster raster = image.getRaster();
+            int pixelRGBValue = raster.getSample(x, y, 0);
+            return pixelRGBValue == 0;
+        }
+
+        int luminanceValue = 140;
+        return isBlack(image, x, y, luminanceValue);
+    }
+
+    /**
+     * Whether the pixel is black.
+     * 
+     * @param image source image
+     * @param x
+     * @param y
+     * @param luminanceCutOff
+     * @return 
+     */
+    public static boolean isBlack(BufferedImage image, int x, int y, int luminanceCutOff) {
+        int pixelRGBValue;
+        int r;
+        int g;
+        int b;
+        double luminance = 0.0;
+
+        // return white on areas outside of image boundaries
+        if (x < 0 || y < 0 || x > image.getWidth() || y > image.getHeight()) {
+            return false;
+        }
+
+        try {
+            pixelRGBValue = image.getRGB(x, y);
+            r = (pixelRGBValue >> 16) & 0xff;
+            g = (pixelRGBValue >> 8) & 0xff;
+            b = (pixelRGBValue) & 0xff;
+            luminance = (r * 0.299) + (g * 0.587) + (b * 0.114);
+        } catch (Exception e) {
+            // ignore.
+        }
+
+        return luminance < luminanceCutOff;
+    }
+
+    /**
+     * Rotates image.
+     * 
+     * @param image source image
+     * @param angle by degrees
+     * @param cx x-coordinate of pivot point
+     * @param cy y-coordinate of pivot point
+     * @return rotated image
+     */
+    public static BufferedImage rotate(BufferedImage image, double angle, int cx, int cy) {
+        int width = image.getWidth(null);
+        int height = image.getHeight(null);
+
+        int minX, minY, maxX, maxY;
+        minX = minY = maxX = maxY = 0;
+
+        int[] corners = {0, 0, width, 0, width, height, 0, height};
+
+        double theta = Math.toRadians(angle);
+        for (int i = 0; i < corners.length; i += 2) {
+            int x = (int) (Math.cos(theta) * (corners[i] - cx)
+                    - Math.sin(theta) * (corners[i + 1] - cy) + cx);
+            int y = (int) (Math.sin(theta) * (corners[i] - cx)
+                    + Math.cos(theta) * (corners[i + 1] - cy) + cy);
+
+            if (x > maxX) {
+                maxX = x;
+            }
+
+            if (x < minX) {
+                minX = x;
+            }
+
+            if (y > maxY) {
+                maxY = y;
+            }
+
+            if (y < minY) {
+                minY = y;
+            }
+
+        }
+
+        cx = (cx - minX);
+        cy = (cy - minY);
+
+        BufferedImage bi = new BufferedImage((maxX - minX), (maxY - minY),
+                image.getType());
+        Graphics2D g2 = bi.createGraphics();
+        g2.setRenderingHint(RenderingHints.KEY_INTERPOLATION,
+                RenderingHints.VALUE_INTERPOLATION_BICUBIC);
+
+        g2.setBackground(Color.white);
+        g2.fillRect(0, 0, bi.getWidth(), bi.getHeight());
+
+        AffineTransform at = new AffineTransform();
+        at.rotate(theta, cx, cy);
+
+        g2.setTransform(at);
+        g2.drawImage(image, -minX, -minY, null);
+        g2.dispose();
+
+        return bi;
+    }
+}
--- a/Tess4J/src/net/sourceforge/tess4j/ITessAPI.java
+++ b/Tess4J/src/net/sourceforge/tess4j/ITessAPI.java
@@ -0,0 +1,617 @@
+/**
+ * Copyright @ 2014 Quan Nguyen
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package net.sourceforge.tess4j;
+
+import com.sun.jna.Callback;
+import com.sun.jna.NativeLong;
+import com.sun.jna.Pointer;
+import com.sun.jna.PointerType;
+import com.sun.jna.Structure;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * An interface represents common TessAPI classes/constants.
+ */
+public interface ITessAPI {
+
+    /**
+     * When Tesseract/Cube is initialized we can choose to instantiate/load/run
+     * only the Tesseract part, only the Cube part or both along with the
+     * combiner. The preference of which engine to use is stored in
+     * <code>tessedit_ocr_engine_mode</code>.<br>
+     * <br>
+     * ATTENTION: When modifying this enum, please make sure to make the
+     * appropriate changes to all the enums mirroring it (e.g. OCREngine in
+     * cityblock/workflow/detection/detection_storage.proto). Such enums will
+     * mention the connection to OcrEngineMode in the comments.
+     */
+    public static interface TessOcrEngineMode {
+
+        /**
+         * Run Tesseract only - fastest
+         */
+        public static final int OEM_TESSERACT_ONLY = 0;
+        /**
+         * Run Cube only - better accuracy, but slower
+         */
+        public static final int OEM_CUBE_ONLY = 1;
+        /**
+         * Run both and combine results - best accuracy
+         */
+        public static final int OEM_TESSERACT_CUBE_COMBINED = 2;
+        /**
+         * Specify this mode when calling <code>init_*()</code>, to indicate
+         * that any of the above modes should be automatically inferred from the
+         * variables in the language-specific config, command-line configs, or
+         * if not specified in any of the above should be set to the default
+         * <code>OEM_TESSERACT_ONLY</code>.
+         */
+        public static final int OEM_DEFAULT = 3;
+    };
+
+    /**
+     * Possible modes for page layout analysis. These *must* be kept in order of
+     * decreasing amount of layout analysis to be done, except for
+     * <code>OSD_ONLY</code>, so that the inequality test macros below work.
+     */
+    public static interface TessPageSegMode {
+
+        /**
+         * Orientation and script detection only.
+         */
+        public static final int PSM_OSD_ONLY = 0;
+        /**
+         * Automatic page segmentation with orientation and script detection.
+         * (OSD)
+         */
+        public static final int PSM_AUTO_OSD = 1;
+        /**
+         * Automatic page segmentation, but no OSD, or OCR.
+         */
+        public static final int PSM_AUTO_ONLY = 2;
+        /**
+         * Fully automatic page segmentation, but no OSD.
+         */
+        public static final int PSM_AUTO = 3;
+        /**
+         * Assume a single column of text of variable sizes.
+         */
+        public static final int PSM_SINGLE_COLUMN = 4;
+        /**
+         * Assume a single uniform block of vertically aligned text.
+         */
+        public static final int PSM_SINGLE_BLOCK_VERT_TEXT = 5;
+        /**
+         * Assume a single uniform block of text.
+         */
+        public static final int PSM_SINGLE_BLOCK = 6;
+        /**
+         * Treat the image as a single text line.
+         */
+        public static final int PSM_SINGLE_LINE = 7;
+        /**
+         * Treat the image as a single word.
+         */
+        public static final int PSM_SINGLE_WORD = 8;
+        /**
+         * Treat the image as a single word in a circle.
+         */
+        public static final int PSM_CIRCLE_WORD = 9;
+        /**
+         * Treat the image as a single character.
+         */
+        public static final int PSM_SINGLE_CHAR = 10;
+        /**
+         * Find as much text as possible in no particular order.
+         */
+        public static final int PSM_SPARSE_TEXT = 11;
+        /**
+         * Sparse text with orientation and script detection.
+         */
+        public static final int PSM_SPARSE_TEXT_OSD = 12;
+        /**
+         * Number of enum entries.
+         */
+        public static final int PSM_COUNT = 13;
+    };
+
+    /**
+     * Enum of the elements of the page hierarchy, used in
+     * <code>ResultIterator</code> to provide functions that operate on each
+     * level without having to have 5x as many functions.
+     */
+    public static interface TessPageIteratorLevel {
+
+        /**
+         * Block of text/image/separator line.
+         */
+        public static final int RIL_BLOCK = 0;
+        /**
+         * Paragraph within a block.
+         */
+        public static final int RIL_PARA = 1;
+        /**
+         * Line within a paragraph.
+         */
+        public static final int RIL_TEXTLINE = 2;
+        /**
+         * Word within a textline.
+         */
+        public static final int RIL_WORD = 3;
+        /**
+         * Symbol/character within a word.
+         */
+        public static final int RIL_SYMBOL = 4;
+    };
+
+    /**
+     * Possible types for a POLY_BLOCK or ColPartition. Must be kept in sync
+     * with <code>kPBColors</code> in polyblk.cpp and <code>PTIs*Type</code>
+     * functions below, as well as <code>kPolyBlockNames</code> in
+     * publictypes.cpp. Used extensively by ColPartition, and POLY_BLOCK.
+     */
+    public static interface TessPolyBlockType {
+
+        /**
+         * Type is not yet known. Keep as the first element.
+         */
+        public static final int PT_UNKNOWN = 0;
+        /**
+         * Text that lives inside a column.
+         */
+        public static final int PT_FLOWING_TEXT = 1;
+        /**
+         * Text that spans more than one column.
+         */
+        public static final int PT_HEADING_TEXT = 2;
+        /**
+         * Text that is in a cross-column pull-out region.
+         */
+        public static final int PT_PULLOUT_TEXT = 3;
+        /**
+         * Partition belonging to an equation region.
+         */
+        public static final int PT_EQUATION = 4;
+        /**
+         * Partition has inline equation.
+         */
+        public static final int PT_INLINE_EQUATION = 5;
+        /**
+         * Partition belonging to a table region.
+         */
+        public static final int PT_TABLE = 6;
+        /**
+         * Text-line runs vertically.
+         */
+        public static final int PT_VERTICAL_TEXT = 7;
+        /**
+         * Text that belongs to an image.
+         */
+        public static final int PT_CAPTION_TEXT = 8;
+        /**
+         * Image that lives inside a column.
+         */
+        public static final int PT_FLOWING_IMAGE = 9;
+        /**
+         * Image that spans more than one column.
+         */
+        public static final int PT_HEADING_IMAGE = 10;
+        /**
+         * Image that is in a cross-column pull-out region.
+         */
+        public static final int PT_PULLOUT_IMAGE = 11;
+        /**
+         * Horizontal Line.
+         */
+        public static final int PT_HORZ_LINE = 12;
+        /**
+         * Vertical Line.
+         */
+        public static final int PT_VERT_LINE = 13;
+        /**
+         * Lies outside of any column.
+         */
+        public static final int PT_NOISE = 14;
+        /**
+         * Number of enum entries.
+         */
+        public static final int PT_COUNT = 15;
+    };
+
+    /**
+     * NOTA BENE: Fully justified paragraphs (text aligned to both left and
+     * right margins) are marked by Tesseract with JUSTIFICATION_LEFT if their
+     * text is written with a left-to-right script and with JUSTIFICATION_RIGHT
+     * if their text is written in a right-to-left script.<br>
+     * <br>
+     * Interpretation for text read in vertical lines: "Left" is wherever the
+     * starting reading position is.
+     */
+    public static interface TessParagraphJustification {
+
+        /**
+         * The alignment is not clearly one of the other options. This could
+         * happen for example if there are only one or two lines of text or the
+         * text looks like source code or poetry.
+         */
+        public static final int JUSTIFICATION_UNKNOWN = 0;
+        /**
+         * Each line, except possibly the first, is flush to the same left tab
+         * stop.
+         */
+        public static final int JUSTIFICATION_LEFT = 1;
+        /**
+         * The text lines of the paragraph are centered about a line going down
+         * through their middle of the text lines.
+         */
+        public static final int JUSTIFICATION_CENTER = 2;
+        /**
+         * Each line, except possibly the first, is flush to the same right tab
+         * stop.
+         */
+        public static final int JUSTIFICATION_RIGHT = 3;
+    }
+
+    /**
+     * <pre>
+     *  +------------------+
+     *  | 1 Aaaa Aaaa Aaaa |
+     *  | Aaa aa aaa aa    |
+     *  | aaaaaa A aa aaa. |
+     *  |                2 |
+     *  |   #######  c c C |
+     *  |   #######  c c c |
+     *  | &lt; #######  c c c |
+     *  | &lt; #######  c   c |
+     *  | &lt; #######  .   c |
+     *  | 3 #######      c |
+     *  +------------------+
+     * </pre> Orientation Example:
+     * <br>
+     * ====================
+     * <br>
+     * Above is a diagram of some (1) English and (2) Chinese text and a (3)
+     * photo credit.<br>
+     * <br>
+     * Upright Latin characters are represented as A and a. '&lt;' represents a
+     * latin character rotated anti-clockwise 90 degrees. Upright Chinese
+     * characters are represented C and c.<br>
+     * <br> NOTA BENE: enum values here should match goodoc.proto<br>
+     * <br> If you orient your head so that "up" aligns with Orientation, then
+     * the characters will appear "right side up" and readable.<br>
+     * <br>
+     * In the example above, both the English and Chinese paragraphs are
+     * oriented so their "up" is the top of the page (page up). The photo credit
+     * is read with one's head turned leftward ("up" is to page left).<br>
+     * <br>
+     * The values of this enum match the convention of Tesseract's osdetect.h
+     */
+    public static interface TessOrientation {
+
+        public static final int ORIENTATION_PAGE_UP = 0;
+        public static final int ORIENTATION_PAGE_RIGHT = 1;
+        public static final int ORIENTATION_PAGE_DOWN = 2;
+        public static final int ORIENTATION_PAGE_LEFT = 3;
+    };
+
+    /**
+     * The grapheme clusters within a line of text are laid out logically in
+     * this direction, judged when looking at the text line rotated so that its
+     * Orientation is "page up".<br>
+     * <br>
+     * For English text, the writing direction is left-to-right. For the Chinese
+     * text in the above example, the writing direction is top-to-bottom.
+     */
+    public static interface TessWritingDirection {
+
+        public static final int WRITING_DIRECTION_LEFT_TO_RIGHT = 0;
+        public static final int WRITING_DIRECTION_RIGHT_TO_LEFT = 1;
+        public static final int WRITING_DIRECTION_TOP_TO_BOTTOM = 2;
+    };
+
+    /**
+     * The text lines are read in the given sequence.<br>
+     * <br>
+     * In English, the order is top-to-bottom. In Chinese, vertical text lines
+     * are read right-to-left. Mongolian is written in vertical columns top to
+     * bottom like Chinese, but the lines order left-to right.<br>
+     * <br>
+     * Note that only some combinations make sense. For example,
+     * <code>WRITING_DIRECTION_LEFT_TO_RIGHT</code> implies
+     * <code>TEXTLINE_ORDER_TOP_TO_BOTTOM</code>.
+     */
+    public static interface TessTextlineOrder {
+
+        public static final int TEXTLINE_ORDER_LEFT_TO_RIGHT = 0;
+        public static final int TEXTLINE_ORDER_RIGHT_TO_LEFT = 1;
+        public static final int TEXTLINE_ORDER_TOP_TO_BOTTOM = 2;
+    };
+
+    public static final int TRUE = 1;
+    public static final int FALSE = 0;
+
+    /**
+     * Base class for all tesseract APIs. Specific classes can add ability to
+     * work on different inputs or produce different outputs. This class is
+     * mostly an interface layer on top of the Tesseract instance class to hide
+     * the data types so that users of this class don't have to include any
+     * other Tesseract headers.
+     */
+    public static class TessBaseAPI extends PointerType {
+
+        public TessBaseAPI(Pointer address) {
+            super(address);
+        }
+
+        public TessBaseAPI() {
+            super();
+        }
+    };
+
+    /**
+     * Class to iterate over tesseract page structure, providing access to all
+     * levels of the page hierarchy, without including any tesseract headers or
+     * having to handle any tesseract structures.<br>
+     * WARNING! This class points to data held within the TessBaseAPI class, and
+     * therefore can only be used while the TessBaseAPI class still exists and
+     * has not been subjected to a call of <code>Init</code>,
+     * <code>SetImage</code>, <code>Recognize</code>, <code>Clear</code>,
+     * <code>End</code> <code>DetectOS</code>, or anything else that changes the
+     * internal <code>PAGE_RES</code>. See <code>apitypes.h</code> for the
+     * definition of <code>PageIteratorLevel</code>. See also
+     * <code>ResultIterator</code>, derived from <code>PageIterator</code>,
+     * which adds in the ability to access OCR output with text-specific
+     * methods.
+     */
+    public static class TessPageIterator extends PointerType {
+
+        public TessPageIterator(Pointer address) {
+            super(address);
+        }
+
+        public TessPageIterator() {
+            super();
+        }
+    };
+
+    /**
+     * MutableIterator adds access to internal data structures.
+     */
+    public static class TessMutableIterator extends PointerType {
+
+        public TessMutableIterator(Pointer address) {
+            super(address);
+        }
+
+        public TessMutableIterator() {
+            super();
+        }
+    };
+
+    /**
+     * Iterator for tesseract results that is capable of iterating in proper
+     * reading order over Bi Directional (e.g. mixed Hebrew and English) text.
+     * ResultIterator adds text-specific methods for access to OCR output.
+     */
+    public static class TessResultIterator extends PointerType {
+
+        public TessResultIterator(Pointer address) {
+            super(address);
+        }
+
+        public TessResultIterator() {
+            super();
+        }
+    };
+
+    public static class TessChoiceIterator extends PointerType {
+
+        public TessChoiceIterator(Pointer address) {
+            super(address);
+        }
+
+        public TessChoiceIterator() {
+            super();
+        }
+    };
+
+    /**
+     * Interface for rendering tesseract results into a document, such as text,
+     * HOCR or pdf. This class is abstract. Specific classes handle individual
+     * formats. This interface is then used to inject the renderer class into
+     * tesseract when processing images.
+     *
+     * For simplicity implementing this with tesseract version 3.01, the
+     * renderer contains document state that is cleared from document to
+     * document just as the TessBaseAPI is. This way the base API can just
+     * delegate its rendering functionality to injected renderers, and the
+     * renderers can manage the associated state needed for the specific formats
+     * in addition to the heuristics for producing it.
+     */
+    public static class TessResultRenderer extends PointerType {
+
+        public TessResultRenderer(Pointer address) {
+            super(address);
+        }
+
+        public TessResultRenderer() {
+            super();
+        }
+    };
+
+    /**
+     * Description of the output of the OCR engine. This structure is used as
+     * both a progress monitor and the final output header, since it needs to be
+     * a valid progress monitor while the OCR engine is storing its output to
+     * shared memory. During progress, all the buffer info is -1. Progress
+     * starts at 0 and increases to 100 during OCR. No other constraint. Every
+     * progress callback, the OCR engine must set <code>ocr_alive</code> to 1.
+     * The HP side will set <code>ocr_alive</code> to 0. Repeated failure to
+     * reset to 1 indicates that the OCR engine is dead. If the cancel function
+     * is not null then it is called with the number of user words found. If it
+     * returns true then operation is cancelled.
+     */
+    public static class ETEXT_DESC extends Structure {
+
+        /**
+         * chars in this buffer(0). Total number of UTF-8 bytes for this run.
+         */
+        public short count;
+        /**
+         * percent complete increasing (0-100)
+         */
+        public short progress;
+        /**
+         * true if not last
+         */
+        public byte more_to_come;
+        /**
+         * ocr sets to 1, HP 0
+         */
+        public byte ocr_alive;
+        /**
+         * for errcode use
+         */
+        public byte err_code;
+        /**
+         * returns true to cancel
+         */
+        public CANCEL_FUNC cancel;
+        /**
+         * this or other data for cancel
+         */
+        public Pointer cancel_this;
+        /**
+         * time to stop if not 0
+         */
+        public TimeVal end_time;
+        /**
+         * character data
+         */
+        public EANYCODE_CHAR[] text = new EANYCODE_CHAR[1];
+
+        /**
+         * Gets Field Order.
+         *
+         * @return
+         */
+        @Override
+        protected List getFieldOrder() {
+            return Arrays.asList("count", "progress", "more_to_come", "ocr_alive", "err_code", "cancel", "cancel_this", "end_time", "text");
+        }
+    }
+
+    /**
+     * It should be noted that the format for char_code for version 2.0 and
+     * beyond is UTF-8, which means that ASCII characters will come out as one
+     * structure but other characters will be returned in two or more instances
+     * of this structure with a single byte of the UTF-8 code in each, but each
+     * will have the same bounding box.<br>
+     * <br>
+     * Programs which want to handle languages with different characters sets
+     * will need to handle extended characters appropriately, but
+     * <strong>all</strong>
+     * code needs to be prepared to receive UTF-8 coded characters for
+     * characters such as bullet and fancy quotes.
+     */
+    public static class EANYCODE_CHAR extends Structure {
+
+        /**
+         * character itself, one single UTF-8 byte long. A Unicode character may
+         * consist of one or more UTF-8 bytes. Bytes of a character will have
+         * the same bounding box.
+         */
+        public byte char_code;
+        /**
+         * left of char (-1)
+         */
+        public short left;
+        /**
+         * right of char (-1)
+         */
+        public short right;
+        /**
+         * top of char (-1)
+         */
+        public short top;
+        /**
+         * bottom of char (-1)
+         */
+        public short bottom;
+        /**
+         * what font (0)
+         */
+        public short font_index;
+        /**
+         * classification confidence: 0=perfect, 100=reject (0/100)
+         */
+        public byte confidence;
+        /**
+         * point size of char, 72 = 1 inch, (10)
+         */
+        public byte point_size;
+        /**
+         * number of spaces before this char (1)
+         */
+        public byte blanks;
+        /**
+         * char formatting (0)
+         */
+        public byte formatting;
+
+        /**
+         * Gets Field Order.
+         *
+         * @return
+         */
+        @Override
+        protected List getFieldOrder() {
+            return Arrays.asList("char_code", "left", "right", "top", "bottom", "font_index", "confidence", "point_size", "blanks", "formatting");
+        }
+    }
+
+    /**
+     * Callback for <code>cancel_func</code>.
+     */
+    interface CANCEL_FUNC extends Callback {
+
+        /**
+         *
+         * @param cancel_this
+         * @param words
+         * @return
+         */
+        boolean invoke(Pointer cancel_this, int words);
+    };
+
+    public static class TimeVal extends Structure {
+
+        /**
+         * seconds
+         */
+        public NativeLong tv_sec;
+        /**
+         * microseconds
+         */
+        public NativeLong tv_usec;
+
+        @Override
+        protected List<String> getFieldOrder() {
+            return Arrays.asList("tv_sec", "tv_usec");
+        }
+    }
+}
--- a/Tess4J/src/net/sourceforge/tess4j/ITesseract.java
+++ b/Tess4J/src/net/sourceforge/tess4j/ITesseract.java
@@ -0,0 +1,236 @@
+/**
+ * Copyright @ 2014 Quan Nguyen
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package net.sourceforge.tess4j;
+
+import java.awt.Rectangle;
+import java.awt.image.BufferedImage;
+import java.io.File;
+import java.nio.ByteBuffer;
+import java.util.List;
+import javax.imageio.IIOImage;
+
+/**
+ * An interface represents common OCR methods.
+ */
+public interface ITesseract {
+
+    String htmlBeginTag = "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\""
+            + " \"http://www.w3.org/TR/html4/loose.dtd\">\n"
+            + "<html>\n<head>\n<title></title>\n"
+            + "<meta http-equiv=\"Content-Type\" content=\"text/html;"
+            + "charset=utf-8\" />\n<meta name='ocr-system' content='tesseract'/>\n"
+            + "</head>\n<body>\n";
+    String htmlEndTag = "</body>\n</html>\n";
+
+    /**
+     * Rendered formats supported by Tesseract.
+     */
+    public enum RenderedFormat {
+
+        TEXT, HOCR, PDF, UNLV, BOX
+    }
+
+    /**
+     * Performs OCR operation.
+     *
+     * @param imageFile an image file
+     * @return the recognized text
+     * @throws TesseractException
+     */
+    String doOCR(File imageFile) throws TesseractException;
+
+    /**
+     * Performs OCR operation.
+     *
+     * @param imageFile an image file
+     * @param rect the bounding rectangle defines the region of the image to be
+     * recognized. A rectangle of zero dimension or <code>null</code> indicates
+     * the whole image.
+     * @return the recognized text
+     * @throws TesseractException
+     */
+    String doOCR(File imageFile, Rectangle rect) throws TesseractException;
+
+    /**
+     * Performs OCR operation.
+     *
+     * @param bi a buffered image
+     * @return the recognized text
+     * @throws TesseractException
+     */
+    String doOCR(BufferedImage bi) throws TesseractException;
+
+    /**
+     * Performs OCR operation.
+     *
+     * @param bi a buffered image
+     * @param rect the bounding rectangle defines the region of the image to be
+     * recognized. A rectangle of zero dimension or <code>null</code> indicates
+     * the whole image.
+     * @return the recognized text
+     * @throws TesseractException
+     */
+    String doOCR(BufferedImage bi, Rectangle rect) throws TesseractException;
+
+    /**
+     * Performs OCR operation.
+     *
+     * @param imageList a list of <code>IIOImage</code> objects
+     * @param rect the bounding rectangle defines the region of the image to be
+     * recognized. A rectangle of zero dimension or <code>null</code> indicates
+     * the whole image.
+     * @return the recognized text
+     * @throws TesseractException
+     */
+    String doOCR(List<IIOImage> imageList, Rectangle rect) throws TesseractException;
+
+    /**
+     * Performs OCR operation.
+     *
+     * @param imageList a list of <code>IIOImage</code> objects
+     * @param filename input file name. Needed only for training and reading a
+     * UNLV zone file.
+     * @param rect the bounding rectangle defines the region of the image to be
+     * recognized. A rectangle of zero dimension or <code>null</code> indicates
+     * the whole image.
+     * @return the recognized text
+     * @throws TesseractException
+     */
+    String doOCR(List<IIOImage> imageList, String filename, Rectangle rect) throws TesseractException;
+
+    /**
+     * Performs OCR operation. Use <code>SetImage</code>, (optionally)
+     * <code>SetRectangle</code>, and one or more of the <code>Get*Text</code>
+     * functions.
+     *
+     * @param xsize width of image
+     * @param ysize height of image
+     * @param buf pixel data
+     * @param rect the bounding rectangle defines the region of the image to be
+     * recognized. A rectangle of zero dimension or <code>null</code> indicates
+     * the whole image.
+     * @param bpp bits per pixel, represents the bit depth of the image, with 1
+     * for binary bitmap, 8 for gray, and 24 for color RGB.
+     * @return the recognized text
+     * @throws TesseractException
+     */
+    String doOCR(int xsize, int ysize, ByteBuffer buf, Rectangle rect, int bpp) throws TesseractException;
+
+    /**
+     * Performs OCR operation. Use <code>SetImage</code>, (optionally)
+     * <code>SetRectangle</code>, and one or more of the <code>Get*Text</code>
+     * functions.
+     *
+     * @param xsize width of image
+     * @param ysize height of image
+     * @param buf pixel data
+     * @param filename input file name. Needed only for training and reading a
+     * UNLV zone file.
+     * @param rect the bounding rectangle defines the region of the image to be
+     * recognized. A rectangle of zero dimension or <code>null</code> indicates
+     * the whole image.
+     * @param bpp bits per pixel, represents the bit depth of the image, with 1
+     * for binary bitmap, 8 for gray, and 24 for color RGB.
+     * @return the recognized text
+     * @throws TesseractException
+     */
+    String doOCR(int xsize, int ysize, ByteBuffer buf, String filename, Rectangle rect, int bpp) throws TesseractException;
+
+    /**
+     * Sets tessdata path.
+     *
+     * @param datapath the tessdata path to set
+     */
+    void setDatapath(String datapath);
+
+    /**
+     * Sets language for OCR.
+     *
+     * @param language the language code, which follows ISO 639-3 standard.
+     */
+    void setLanguage(String language);
+
+    /**
+     * Sets OCR engine mode.
+     *
+     * @param ocrEngineMode the OcrEngineMode to set
+     */
+    void setOcrEngineMode(int ocrEngineMode);
+
+    /**
+     * Sets page segmentation mode.
+     *
+     * @param mode the page segmentation mode to set
+     */
+    void setPageSegMode(int mode);
+
+    /**
+     * Sets the value of Tesseract's internal parameter.
+     *
+     * @param key variable name, e.g., <code>tessedit_create_hocr</code>,
+     * <code>tessedit_char_whitelist</code>, etc.
+     * @param value value for corresponding variable, e.g., "1", "0",
+     * "0123456789", etc.
+     */
+    void setTessVariable(String key, String value);
+
+    /**
+     * Sets configs to be passed to Tesseract's <code>Init</code> method.
+     *
+     * @param configs list of config filenames, e.g., "digits", "bazaar",
+     * "quiet"
+     */
+    void setConfigs(List<String> configs);
+
+    /**
+     * Creates documents for given renderers.
+     *
+     * @param filename input image
+     * @param outputbase output filename without extension
+     * @param formats types of renderers
+     * @throws TesseractException
+     */
+    void createDocuments(String filename, String outputbase, List<RenderedFormat> formats) throws TesseractException;
+
+    /**
+     * Creates documents for given renderers.
+     *
+     * @param filenames array of input files
+     * @param outputbases array of output filenames without extension
+     * @param formats types of renderers
+     * @throws TesseractException
+     */
+    void createDocuments(String[] filenames, String[] outputbases, List<RenderedFormat> formats) throws TesseractException;
+
+    /**
+     * Gets segmented regions at specified page iterator level.
+     *
+     * @param bi input image
+     * @param pageIteratorLevel TessPageIteratorLevel enum
+     * @return list of <code>Rectangle</code>
+     * @throws TesseractException
+     */
+    List<Rectangle> getSegmentedRegions(BufferedImage bi, int pageIteratorLevel) throws TesseractException;
+    
+    /**
+     * Gets recognized words at specified page iterator level.
+     * 
+     * @param bi input image
+     * @param pageIteratorLevel TessPageIteratorLevel enum
+     * @return list of <code>Word</code>
+     */
+    List<Word> getWords(BufferedImage bi, int pageIteratorLevel);
+}
--- a/Tess4J/src/net/sourceforge/tess4j/TessAPI.java
+++ b/Tess4J/src/net/sourceforge/tess4j/TessAPI.java
--- a/Tess4J/src/net/sourceforge/tess4j/TessAPI1.java
+++ b/Tess4J/src/net/sourceforge/tess4j/TessAPI1.java
--- a/Tess4J/src/net/sourceforge/tess4j/Tesseract.java
+++ b/Tess4J/src/net/sourceforge/tess4j/Tesseract.java
@@ -0,0 +1,682 @@
+/**
+ * Copyright @ 2012 Quan Nguyen
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package net.sourceforge.tess4j;
+
+import com.sun.jna.Pointer;
+import com.sun.jna.StringArray;
+import com.sun.jna.ptr.PointerByReference;
+import java.awt.Rectangle;
+import java.awt.image.*;
+import java.io.*;
+import java.nio.ByteBuffer;
+import java.nio.IntBuffer;
+import java.util.*;
+import javax.imageio.IIOImage;
+import net.sourceforge.lept4j.Box;
+import net.sourceforge.lept4j.Boxa;
+import static net.sourceforge.lept4j.ILeptonica.L_CLONE;
+import net.sourceforge.lept4j.Leptonica;
+import static net.sourceforge.tess4j.ITessAPI.TRUE;
+
+import net.sourceforge.tess4j.ITessAPI.TessBaseAPI;
+import net.sourceforge.tess4j.ITessAPI.TessOcrEngineMode;
+import net.sourceforge.tess4j.ITessAPI.TessPageIterator;
+import net.sourceforge.tess4j.ITessAPI.TessResultIterator;
+import net.sourceforge.tess4j.ITessAPI.TessResultRenderer;
+
+import net.sourceforge.tess4j.util.ImageIOHelper;
+import net.sourceforge.tess4j.util.LoggHelper;
+import net.sourceforge.tess4j.util.PdfUtilities;
+import org.slf4j.*;
+
+/**
+ * An object layer on top of <code>TessAPI</code>, provides character
+ * recognition support for common image formats, and multi-page TIFF images
+ * beyond the uncompressed, binary TIFF format supported by Tesseract OCR
+ * engine. The extended capabilities are provided by the
+ * <code>Java Advanced Imaging Image I/O Tools</code>.<br>
+ * <br>
+ * Support for PDF documents is available through <code>Ghost4J</code>, a
+ * <code>JNA</code> wrapper for <code>GPL Ghostscript</code>, which should be
+ * installed and included in system path.<br>
+ * <br>
+ * Any program that uses the library will need to ensure that the required
+ * libraries (the <code>.jar</code> files for <code>jna</code>,
+ * <code>jai-imageio</code>, and <code>ghost4j</code>) are in its compile and
+ * run-time <code>classpath</code>.
+ */
+public class Tesseract implements ITesseract {
+
+    private static Tesseract instance;
+    private String language = "eng";
+    private String datapath;
+    private RenderedFormat renderedFormat = RenderedFormat.TEXT;
+    private int psm = -1;
+    private int ocrEngineMode = TessOcrEngineMode.OEM_DEFAULT;
+    private final Properties prop = new Properties();
+    private final List<String> configList = new ArrayList<String>();
+
+    private TessAPI api;
+    private TessBaseAPI handle;
+
+    private static final org.slf4j.Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
+    
+    public Tesseract() {
+        try {
+            datapath = System.getenv("TESSDATA_PREFIX");
+        } catch (Exception e) {
+            // ignore
+        } finally {
+            if (datapath == null) {
+                datapath = "./";
+            }
+        }
+    }
+
+    /**
+     * Returns TessAPI object.
+     *
+     * @return api
+     */
+    protected TessAPI getAPI() {
+        return api;
+    }
+
+    /**
+     * Returns API handle.
+     *
+     * @return handle
+     */
+    protected TessBaseAPI getHandle() {
+        return handle;
+    }
+
+    /**
+     * Gets an instance of the class library.
+     *
+     * @deprecated As of Release 2.0, use default constructor instead.
+     * @return instance
+     */
+    @Deprecated
+    public static synchronized Tesseract getInstance() {
+        if (instance == null) {
+            instance = new Tesseract();
+        }
+
+        return instance;
+    }
+
+    /**
+     * Sets path to <code>tessdata</code>.
+     *
+     * @param datapath the tessdata path to set
+     */
+    @Override
+    public void setDatapath(String datapath) {
+        this.datapath = datapath;
+    }
+
+    /**
+     * Sets language for OCR.
+     *
+     * @param language the language code, which follows ISO 639-3 standard.
+     */
+    @Override
+    public void setLanguage(String language) {
+        this.language = language;
+    }
+
+    /**
+     * Sets OCR engine mode.
+     *
+     * @param ocrEngineMode the OcrEngineMode to set
+     */
+    @Override
+    public void setOcrEngineMode(int ocrEngineMode) {
+        this.ocrEngineMode = ocrEngineMode;
+    }
+
+    /**
+     * Sets page segmentation mode.
+     *
+     * @param mode the page segmentation mode to set
+     */
+    @Override
+    public void setPageSegMode(int mode) {
+        this.psm = mode;
+    }
+
+    /**
+     * Enables hocr output.
+     *
+     * @param hocr to enable or disable hocr output
+     */
+    public void setHocr(boolean hocr) {
+        this.renderedFormat = hocr ? RenderedFormat.HOCR : RenderedFormat.TEXT;
+        prop.setProperty("tessedit_create_hocr", hocr ? "1" : "0");
+    }
+
+    /**
+     * Set the value of Tesseract's internal parameter.
+     *
+     * @param key variable name, e.g., <code>tessedit_create_hocr</code>,
+     * <code>tessedit_char_whitelist</code>, etc.
+     * @param value value for corresponding variable, e.g., "1", "0",
+     * "0123456789", etc.
+     */
+    @Override
+    public void setTessVariable(String key, String value) {
+        prop.setProperty(key, value);
+    }
+
+    /**
+     * Sets configs to be passed to Tesseract's <code>Init</code> method.
+     *
+     * @param configs list of config filenames, e.g., "digits", "bazaar",
+     * "quiet"
+     */
+    @Override
+    public void setConfigs(List<String> configs) {
+        configList.clear();
+        if (configs != null) {
+            configList.addAll(configs);
+        }
+    }
+
+    /**
+     * Performs OCR operation.
+     *
+     * @param imageFile an image file
+     * @return the recognized text
+     * @throws TesseractException
+     */
+    @Override
+    public String doOCR(File imageFile) throws TesseractException {
+        return doOCR(imageFile, null);
+    }
+
+    /**
+     * Performs OCR operation.
+     *
+     * @param imageFile an image file
+     * @param rect the bounding rectangle defines the region of the image to be
+     * recognized. A rectangle of zero dimension or <code>null</code> indicates
+     * the whole image.
+     * @return the recognized text
+     * @throws TesseractException
+     */
+    @Override
+    public String doOCR(File imageFile, Rectangle rect) throws TesseractException {
+        try {
+            return doOCR(ImageIOHelper.getIIOImageList(imageFile), imageFile.getPath(), rect);
+        } catch (Exception e) {
+            logger.error(e.getMessage(), e);
+            throw new TesseractException(e);
+        }
+    }
+
+    /**
+     * Performs OCR operation.
+     *
+     * @param bi a buffered image
+     * @return the recognized text
+     * @throws TesseractException
+     */
+    @Override
+    public String doOCR(BufferedImage bi) throws TesseractException {
+        return doOCR(bi, null);
+    }
+
+    /**
+     * Performs OCR operation.
+     *
+     * @param bi a buffered image
+     * @param rect the bounding rectangle defines the region of the image to be
+     * recognized. A rectangle of zero dimension or <code>null</code> indicates
+     * the whole image.
+     * @return the recognized text
+     * @throws TesseractException
+     */
+    @Override
+    public String doOCR(BufferedImage bi, Rectangle rect) throws TesseractException {
+        try {
+            return doOCR(ImageIOHelper.getIIOImageList(bi), rect);
+        } catch (Exception e) {
+            logger.error(e.getMessage(), e);
+            throw new TesseractException(e);
+        }
+    }
+
+    /**
+     * Performs OCR operation.
+     *
+     * @param imageList a list of <code>IIOImage</code> objects
+     * @param rect the bounding rectangle defines the region of the image to be
+     * recognized. A rectangle of zero dimension or <code>null</code> indicates
+     * the whole image.
+     * @return the recognized text
+     * @throws TesseractException
+     */
+    @Override
+    public String doOCR(List<IIOImage> imageList, Rectangle rect) throws TesseractException {
+        return doOCR(imageList, null, rect);
+    }
+
+    /**
+     * Performs OCR operation.
+     *
+     * @param imageList a list of <code>IIOImage</code> objects
+     * @param filename input file name. Needed only for training and reading a
+     * UNLV zone file.
+     * @param rect the bounding rectangle defines the region of the image to be
+     * recognized. A rectangle of zero dimension or <code>null</code> indicates
+     * the whole image.
+     * @return the recognized text
+     * @throws TesseractException
+     */
+    @Override
+    public String doOCR(List<IIOImage> imageList, String filename, Rectangle rect) throws TesseractException {
+        init();
+        setTessVariables();
+
+        try {
+            StringBuilder sb = new StringBuilder();
+            int pageNum = 0;
+
+            for (IIOImage oimage : imageList) {
+                pageNum++;
+                try {
+                    setImage(oimage.getRenderedImage(), rect);
+                    sb.append(getOCRText(filename, pageNum));
+                } catch (IOException ioe) {
+                    // skip the problematic image
+                    logger.error(ioe.getMessage(), ioe);
+                }
+            }
+
+            if (renderedFormat == RenderedFormat.HOCR) {
+                sb.insert(0, htmlBeginTag).append(htmlEndTag);
+            }
+
+            return sb.toString();
+        } finally {
+            dispose();
+        }
+    }
+
+    /**
+     * Performs OCR operation. Use <code>SetImage</code>, (optionally)
+     * <code>SetRectangle</code>, and one or more of the <code>Get*Text</code>
+     * functions.
+     *
+     * @param xsize width of image
+     * @param ysize height of image
+     * @param buf pixel data
+     * @param rect the bounding rectangle defines the region of the image to be
+     * recognized. A rectangle of zero dimension or <code>null</code> indicates
+     * the whole image.
+     * @param bpp bits per pixel, represents the bit depth of the image, with 1
+     * for binary bitmap, 8 for gray, and 24 for color RGB.
+     * @return the recognized text
+     * @throws TesseractException
+     */
+    @Override
+    public String doOCR(int xsize, int ysize, ByteBuffer buf, Rectangle rect, int bpp) throws TesseractException {
+        return doOCR(xsize, ysize, buf, null, rect, bpp);
+    }
+
+    /**
+     * Performs OCR operation. Use <code>SetImage</code>, (optionally)
+     * <code>SetRectangle</code>, and one or more of the <code>Get*Text</code>
+     * functions.
+     *
+     * @param xsize width of image
+     * @param ysize height of image
+     * @param buf pixel data
+     * @param filename input file name. Needed only for training and reading a
+     * UNLV zone file.
+     * @param rect the bounding rectangle defines the region of the image to be
+     * recognized. A rectangle of zero dimension or <code>null</code> indicates
+     * the whole image.
+     * @param bpp bits per pixel, represents the bit depth of the image, with 1
+     * for binary bitmap, 8 for gray, and 24 for color RGB.
+     * @return the recognized text
+     * @throws TesseractException
+     */
+    @Override
+    public String doOCR(int xsize, int ysize, ByteBuffer buf, String filename, Rectangle rect, int bpp) throws TesseractException {
+        init();
+        setTessVariables();
+
+        try {
+            setImage(xsize, ysize, buf, rect, bpp);
+            return getOCRText(filename, 1);
+        } catch (Exception e) {
+            logger.error(e.getMessage(), e);
+            throw new TesseractException(e);
+        } finally {
+            dispose();
+        }
+    }
+
+    /**
+     * Initializes Tesseract engine.
+     */
+    protected void init() {
+        api = TessAPI.INSTANCE;
+        handle = api.TessBaseAPICreate();
+        StringArray sarray = new StringArray(configList.toArray(new String[0]));
+        PointerByReference configs = new PointerByReference();
+        configs.setPointer(sarray);
+        api.TessBaseAPIInit1(handle, datapath, language, ocrEngineMode, configs, configList.size());
+        if (psm > -1) {
+            api.TessBaseAPISetPageSegMode(handle, psm);
+        }
+    }
+
+    /**
+     * Sets Tesseract's internal parameters.
+     */
+    protected void setTessVariables() {
+        Enumeration<?> em = prop.propertyNames();
+        while (em.hasMoreElements()) {
+            String key = (String) em.nextElement();
+            api.TessBaseAPISetVariable(handle, key, prop.getProperty(key));
+        }
+    }
+
+    /**
+     * A wrapper for {@link #setImage(int, int, ByteBuffer, Rectangle, int)}.
+     *
+     * @param image a rendered image
+     * @param rect region of interest
+     * @throws java.io.IOException
+     */
+    protected void setImage(RenderedImage image, Rectangle rect) throws IOException {
+        setImage(image.getWidth(), image.getHeight(), ImageIOHelper.getImageByteBuffer(image), rect, image
+                .getColorModel().getPixelSize());
+    }
+
+    /**
+     * Sets image to be processed.
+     *
+     * @param xsize width of image
+     * @param ysize height of image
+     * @param buf pixel data
+     * @param rect the bounding rectangle defines the region of the image to be
+     * recognized. A rectangle of zero dimension or <code>null</code> indicates
+     * the whole image.
+     * @param bpp bits per pixel, represents the bit depth of the image, with 1
+     * for binary bitmap, 8 for gray, and 24 for color RGB.
+     */
+    protected void setImage(int xsize, int ysize, ByteBuffer buf, Rectangle rect, int bpp) {
+        int bytespp = bpp / 8;
+        int bytespl = (int) Math.ceil(xsize * bpp / 8.0);
+        api.TessBaseAPISetImage(handle, buf, xsize, ysize, bytespp, bytespl);
+
+        if (rect != null && !rect.isEmpty()) {
+            api.TessBaseAPISetRectangle(handle, rect.x, rect.y, rect.width, rect.height);
+        }
+    }
+
+    /**
+     * Gets recognized text.
+     *
+     * @param filename input file name. Needed only for reading a UNLV zone
+     * file.
+     * @param pageNum page number; needed for hocr paging.
+     * @return the recognized text
+     */
+    protected String getOCRText(String filename, int pageNum) {
+        if (filename != null && !filename.isEmpty()) {
+            api.TessBaseAPISetInputName(handle, filename);
+        }
+
+        Pointer utf8Text = renderedFormat == RenderedFormat.HOCR ? api.TessBaseAPIGetHOCRText(handle, pageNum - 1) : api.TessBaseAPIGetUTF8Text(handle);
+        String str = utf8Text.getString(0);
+        api.TessDeleteText(utf8Text);
+        return str;
+    }
+
+    /**
+     * Creates renderers for given formats.
+     *
+     * @param outputbase
+     * @param formats
+     * @return
+     */
+    private TessResultRenderer createRenderers(String outputbase, List<RenderedFormat> formats) {
+        TessResultRenderer renderer = null;
+
+        for (RenderedFormat format : formats) {
+            switch (format) {
+                case TEXT:
+                    if (renderer == null) {
+                        renderer = api.TessTextRendererCreate(outputbase);
+                    } else {
+                        api.TessResultRendererInsert(renderer, api.TessTextRendererCreate(outputbase));
+                    }
+                    break;
+                case HOCR:
+                    if (renderer == null) {
+                        renderer = api.TessHOcrRendererCreate(outputbase);
+                    } else {
+                        api.TessResultRendererInsert(renderer, api.TessHOcrRendererCreate(outputbase));
+                    }
+                    break;
+                case PDF:
+                    String dataPath = api.TessBaseAPIGetDatapath(handle);
+                    if (renderer == null) {
+                        renderer = api.TessPDFRendererCreate(outputbase, dataPath);
+                    } else {
+                        api.TessResultRendererInsert(renderer, api.TessPDFRendererCreate(outputbase, dataPath));
+                    }
+                    break;
+                case BOX:
+                    if (renderer == null) {
+                        renderer = api.TessBoxTextRendererCreate(outputbase);
+                    } else {
+                        api.TessResultRendererInsert(renderer, api.TessBoxTextRendererCreate(outputbase));
+                    }
+                    break;
+                case UNLV:
+                    if (renderer == null) {
+                        renderer = api.TessUnlvRendererCreate(outputbase);
+                    } else {
+                        api.TessResultRendererInsert(renderer, api.TessUnlvRendererCreate(outputbase));
+                    }
+                    break;
+            }
+        }
+
+        return renderer;
+    }
+
+    /**
+     * Creates documents for given renderer.
+     *
+     * @param filename input image
+     * @param outputbase output filename without extension
+     * @param formats types of renderer
+     * @throws TesseractException
+     */
+    @Override
+    public void createDocuments(String filename, String outputbase, List<RenderedFormat> formats) throws TesseractException {
+        createDocuments(new String[]{filename}, new String[]{outputbase}, formats);
+    }
+
+    /**
+     * Creates documents.
+     *
+     * @param filenames array of input files
+     * @param outputbases array of output filenames without extension
+     * @param formats types of renderer
+     * @throws TesseractException
+     */
+    @Override
+    public void createDocuments(String[] filenames, String[] outputbases, List<RenderedFormat> formats) throws TesseractException {
+        if (filenames.length != outputbases.length) {
+            throw new RuntimeException("The two arrays must match in length.");
+        }
+
+        init();
+        setTessVariables();
+
+        try {
+            for (int i = 0; i < filenames.length; i++) {
+                File workingTiffFile = null;
+                try {
+                    String filename = filenames[i];
+
+                    // if PDF, convert to multi-page TIFF
+                    if (filename.toLowerCase().endsWith(".pdf")) {
+                        workingTiffFile = PdfUtilities.convertPdf2Tiff(new File(filename));
+                        filename = workingTiffFile.getPath();
+                    }
+
+                    TessResultRenderer renderer = createRenderers(outputbases[i], formats);
+                    createDocuments(filename, renderer);
+                    api.TessDeleteResultRenderer(renderer);
+                } catch (Exception e) {
+                    // skip the problematic image file
+                    logger.error(e.getMessage(), e);
+                } finally {
+                    if (workingTiffFile != null && workingTiffFile.exists()) {
+                        workingTiffFile.delete();
+                    }
+                }
+            }
+        } finally {
+            dispose();
+        }
+    }
+
+    /**
+     * Creates documents.
+     *
+     * @param filename input file
+     * @param renderer renderer
+     * @throws TesseractException
+     */
+    private void createDocuments(String filename, TessResultRenderer renderer) throws TesseractException {
+        api.TessBaseAPISetInputName(handle, filename); //for reading a UNLV zone file
+        int result = api.TessBaseAPIProcessPages(handle, filename, null, 0, renderer);
+
+        if (result == ITessAPI.FALSE) {
+            throw new TesseractException("Error during processing page.");
+        }
+    }
+
+    /**
+     * Gets segmented regions at specified page iterator level.
+     *
+     * @param bi input image
+     * @param pageIteratorLevel TessPageIteratorLevel enum
+     * @return list of <code>Rectangle</code>
+     * @throws TesseractException
+     */
+    @Override
+    public List<Rectangle> getSegmentedRegions(BufferedImage bi, int pageIteratorLevel) throws TesseractException {
+        init();
+        setTessVariables();
+
+        try {
+            List<Rectangle> list = new ArrayList<Rectangle>();
+            setImage(bi, null);
+
+            Boxa boxes = api.TessBaseAPIGetComponentImages(handle, pageIteratorLevel, TRUE, null, null);
+            Leptonica leptInstance = Leptonica.INSTANCE;
+            int boxCount = leptInstance.boxaGetCount(boxes);
+            for (int i = 0; i < boxCount; i++) {
+                Box box = leptInstance.boxaGetBox(boxes, i, L_CLONE);
+                if (box == null) {
+                    continue;
+                }
+                list.add(new Rectangle(box.x, box.y, box.w, box.h));
+                PointerByReference pRef = new PointerByReference();
+                pRef.setValue(box.getPointer());
+                leptInstance.boxDestroy(pRef);
+            }
+
+            PointerByReference pRef = new PointerByReference();
+            pRef.setValue(boxes.getPointer());
+            leptInstance.boxaDestroy(pRef);
+
+            return list;
+        } catch (IOException ioe) {
+            // skip the problematic image
+            logger.error(ioe.getMessage(), ioe);
+            throw new TesseractException(ioe);
+        } finally {
+            dispose();
+        }
+    }
+
+    /**
+     * Gets recognized words at specified page iterator level.
+     *
+     * @param bi input image
+     * @param pageIteratorLevel TessPageIteratorLevel enum
+     * @return list of <code>Word</code>
+     */
+    @Override
+    public List<Word> getWords(BufferedImage bi, int pageIteratorLevel) {
+        this.init();
+        this.setTessVariables();
+
+        List<Word> words = new ArrayList<Word>();
+
+        try {
+            setImage(bi, null);
+
+            api.TessBaseAPIRecognize(handle, null);
+            TessResultIterator ri = api.TessBaseAPIGetIterator(handle);
+            TessPageIterator pi = api.TessResultIteratorGetPageIterator(ri);
+            api.TessPageIteratorBegin(pi);
+
+            do {
+                Pointer ptr = api.TessResultIteratorGetUTF8Text(ri, pageIteratorLevel);
+                String text = ptr.getString(0);
+                api.TessDeleteText(ptr);
+                float confidence = api.TessResultIteratorConfidence(ri, pageIteratorLevel);
+                IntBuffer leftB = IntBuffer.allocate(1);
+                IntBuffer topB = IntBuffer.allocate(1);
+                IntBuffer rightB = IntBuffer.allocate(1);
+                IntBuffer bottomB = IntBuffer.allocate(1);
+                api.TessPageIteratorBoundingBox(pi, pageIteratorLevel, leftB, topB, rightB, bottomB);
+                int left = leftB.get();
+                int top = topB.get();
+                int right = rightB.get();
+                int bottom = bottomB.get();
+                Word word = new Word(text, confidence, new Rectangle(left, top, right - left, bottom - top));
+                words.add(word);
+            } while (api.TessPageIteratorNext(pi, pageIteratorLevel) == TRUE);
+
+            return words;
+        } catch (Exception e) {
+            return words;
+        } finally {
+            dispose();
+        }
+    }
+
+    /**
+     * Releases all of the native resources used by this instance.
+     */
+    protected void dispose() {
+        api.TessBaseAPIDelete(handle);
+    }
+}
--- a/Tess4J/src/net/sourceforge/tess4j/Tesseract1.java
+++ b/Tess4J/src/net/sourceforge/tess4j/Tesseract1.java
@@ -0,0 +1,647 @@
+/**
+ * Copyright @ 2012 Quan Nguyen
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package net.sourceforge.tess4j;
+
+import com.sun.jna.Pointer;
+import com.sun.jna.StringArray;
+import com.sun.jna.ptr.PointerByReference;
+import java.awt.Rectangle;
+import java.awt.image.*;
+import java.io.*;
+import java.nio.ByteBuffer;
+import java.nio.IntBuffer;
+import java.util.*;
+import javax.imageio.IIOImage;
+import net.sourceforge.lept4j.Box;
+import net.sourceforge.lept4j.Boxa;
+import static net.sourceforge.lept4j.ILeptonica.L_CLONE;
+import net.sourceforge.lept4j.Leptonica1;
+import static net.sourceforge.tess4j.ITessAPI.TRUE;
+
+import net.sourceforge.tess4j.util.ImageIOHelper;
+import net.sourceforge.tess4j.util.LoggHelper;
+import net.sourceforge.tess4j.util.PdfUtilities;
+import org.slf4j.*;
+
+/**
+ * An object layer on top of <code>TessAPI1</code>, provides character
+ * recognition support for common image formats, and multi-page TIFF images
+ * beyond the uncompressed, binary TIFF format supported by Tesseract OCR
+ * engine. The extended capabilities are provided by the
+ * <code>Java Advanced Imaging Image I/O Tools</code>.<br>
+ * <br>
+ * Support for PDF documents is available through <code>Ghost4J</code>, a
+ * <code>JNA</code> wrapper for <code>GPL Ghostscript</code>, which should be
+ * installed and included in system path.<br>
+ * <br>
+ * Any program that uses the library will need to ensure that the required
+ * libraries (the <code>.jar</code> files for <code>jna</code>,
+ * <code>jai-imageio</code>, and <code>ghost4j</code>) are in its compile and
+ * run-time <code>classpath</code>.
+ */
+public class Tesseract1 extends TessAPI1 implements ITesseract {
+
+    private String language = "eng";
+    private String datapath;
+    private RenderedFormat renderedFormat = RenderedFormat.TEXT;
+    private int psm = -1;
+    private int ocrEngineMode = TessOcrEngineMode.OEM_DEFAULT;
+    private final Properties prop = new Properties();
+    private final List<String> configList = new ArrayList<String>();
+
+    private TessBaseAPI handle;
+
+    private static final org.slf4j.Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
+
+    public Tesseract1() {
+        try {
+            datapath = System.getenv("TESSDATA_PREFIX");
+        } catch (Exception e) {
+            // ignore
+        } finally {
+            if (datapath == null) {
+                datapath = "./";
+            }
+        }
+    }
+
+    /**
+     * Returns API handle.
+     *
+     * @return handle
+     */
+    protected TessBaseAPI getHandle() {
+        return handle;
+    }
+
+    /**
+     * Sets path to <code>tessdata</code>.
+     *
+     * @param datapath the tessdata path to set
+     */
+    @Override
+    public void setDatapath(String datapath) {
+        this.datapath = datapath;
+    }
+
+    /**
+     * Sets language for OCR.
+     *
+     * @param language the language code, which follows ISO 639-3 standard.
+     */
+    @Override
+    public void setLanguage(String language) {
+        this.language = language;
+    }
+
+    /**
+     * Sets OCR engine mode.
+     *
+     * @param ocrEngineMode the OcrEngineMode to set
+     */
+    @Override
+    public void setOcrEngineMode(int ocrEngineMode) {
+        this.ocrEngineMode = ocrEngineMode;
+    }
+
+    /**
+     * Sets page segmentation mode.
+     *
+     * @param mode the page segmentation mode to set
+     */
+    @Override
+    public void setPageSegMode(int mode) {
+        this.psm = mode;
+    }
+
+    /**
+     * Enables hocr output.
+     *
+     * @param hocr to enable or disable hocr output
+     */
+    public void setHocr(boolean hocr) {
+        this.renderedFormat = hocr ? RenderedFormat.HOCR : RenderedFormat.TEXT;
+        prop.setProperty("tessedit_create_hocr", hocr ? "1" : "0");
+    }
+
+    /**
+     * Set the value of Tesseract's internal parameter.
+     *
+     * @param key variable name, e.g., <code>tessedit_create_hocr</code>,
+     * <code>tessedit_char_whitelist</code>, etc.
+     * @param value value for corresponding variable, e.g., "1", "0",
+     * "0123456789", etc.
+     */
+    @Override
+    public void setTessVariable(String key, String value) {
+        prop.setProperty(key, value);
+    }
+
+    /**
+     * Sets configs to be passed to Tesseract's <code>Init</code> method.
+     *
+     * @param configs list of config filenames, e.g., "digits", "bazaar",
+     * "quiet"
+     */
+    @Override
+    public void setConfigs(List<String> configs) {
+        configList.clear();
+        if (configs != null) {
+            configList.addAll(configs);
+        }
+    }
+
+    /**
+     * Performs OCR operation.
+     *
+     * @param imageFile an image file
+     * @return the recognized text
+     * @throws TesseractException
+     */
+    @Override
+    public String doOCR(File imageFile) throws TesseractException {
+        return doOCR(imageFile, null);
+    }
+
+    /**
+     * Performs OCR operation.
+     *
+     * @param imageFile an image file
+     * @param rect the bounding rectangle defines the region of the image to be
+     * recognized. A rectangle of zero dimension or <code>null</code> indicates
+     * the whole image.
+     * @return the recognized text
+     * @throws TesseractException
+     */
+    @Override
+    public String doOCR(File imageFile, Rectangle rect) throws TesseractException {
+        try {
+            return doOCR(ImageIOHelper.getIIOImageList(imageFile), imageFile.getPath(), rect);
+        } catch (Exception e) {
+            logger.error(e.getMessage(), e);
+            throw new TesseractException(e);
+        }
+    }
+
+    /**
+     * Performs OCR operation.
+     *
+     * @param bi a buffered image
+     * @return the recognized text
+     * @throws TesseractException
+     */
+    @Override
+    public String doOCR(BufferedImage bi) throws TesseractException {
+        return doOCR(bi, null);
+    }
+
+    /**
+     * Performs OCR operation.
+     *
+     * @param bi a buffered image
+     * @param rect the bounding rectangle defines the region of the image to be
+     * recognized. A rectangle of zero dimension or <code>null</code> indicates
+     * the whole image.
+     * @return the recognized text
+     * @throws TesseractException
+     */
+    @Override
+    public String doOCR(BufferedImage bi, Rectangle rect) throws TesseractException {
+        try {
+            return doOCR(ImageIOHelper.getIIOImageList(bi), rect);
+        } catch (Exception e) {
+            logger.error(e.getMessage(), e);
+            throw new TesseractException(e);
+        }
+    }
+
+    /**
+     * Performs OCR operation.
+     *
+     * @param imageList a list of <code>IIOImage</code> objects
+     * @param rect the bounding rectangle defines the region of the image to be
+     * recognized. A rectangle of zero dimension or <code>null</code> indicates
+     * the whole image.
+     * @return the recognized text
+     * @throws TesseractException
+     */
+    @Override
+    public String doOCR(List<IIOImage> imageList, Rectangle rect) throws TesseractException {
+        return doOCR(imageList, null, rect);
+    }
+
+    /**
+     * Performs OCR operation.
+     *
+     * @param imageList a list of <code>IIOImage</code> objects
+     * @param filename input file name
+     * @param rect the bounding rectangle defines the region of the image to be
+     * recognized. A rectangle of zero dimension or <code>null</code> indicates
+     * the whole image.
+     * @return the recognized text
+     * @throws TesseractException
+     */
+    @Override
+    public String doOCR(List<IIOImage> imageList, String filename, Rectangle rect) throws TesseractException {
+        init();
+        setTessVariables();
+
+        try {
+            StringBuilder sb = new StringBuilder();
+            int pageNum = 0;
+
+            for (IIOImage oimage : imageList) {
+                pageNum++;
+                try {
+                    setImage(oimage.getRenderedImage(), rect);
+                    sb.append(getOCRText(filename, pageNum));
+                } catch (IOException ioe) {
+                    // skip the problematic image
+                    logger.error(ioe.getMessage(), ioe);
+                }
+            }
+
+            if (renderedFormat == RenderedFormat.HOCR) {
+                sb.insert(0, htmlBeginTag).append(htmlEndTag);
+            }
+
+            return sb.toString();
+        } finally {
+            dispose();
+        }
+    }
+
+    /**
+     * Performs OCR operation. Use <code>SetImage</code>, (optionally)
+     * <code>SetRectangle</code>, and one or more of the <code>Get*Text</code>
+     * functions.
+     *
+     * @param xsize width of image
+     * @param ysize height of image
+     * @param buf pixel data
+     * @param rect the bounding rectangle defines the region of the image to be
+     * recognized. A rectangle of zero dimension or <code>null</code> indicates
+     * the whole image.
+     * @param bpp bits per pixel, represents the bit depth of the image, with 1
+     * for binary bitmap, 8 for gray, and 24 for color RGB.
+     * @return the recognized text
+     * @throws TesseractException
+     */
+    @Override
+    public String doOCR(int xsize, int ysize, ByteBuffer buf, Rectangle rect, int bpp) throws TesseractException {
+        return doOCR(xsize, ysize, buf, null, rect, bpp);
+    }
+
+    /**
+     * Performs OCR operation. Use <code>SetImage</code>, (optionally)
+     * <code>SetRectangle</code>, and one or more of the <code>Get*Text</code>
+     * functions.
+     *
+     * @param xsize width of image
+     * @param ysize height of image
+     * @param buf pixel data
+     * @param filename input file name. Needed only for training and reading a
+     * UNLV zone file.
+     * @param rect the bounding rectangle defines the region of the image to be
+     * recognized. A rectangle of zero dimension or <code>null</code> indicates
+     * the whole image.
+     * @param bpp bits per pixel, represents the bit depth of the image, with 1
+     * for binary bitmap, 8 for gray, and 24 for color RGB.
+     * @return the recognized text
+     * @throws TesseractException
+     */
+    @Override
+    public String doOCR(int xsize, int ysize, ByteBuffer buf, String filename, Rectangle rect, int bpp) throws TesseractException {
+        init();
+        setTessVariables();
+
+        try {
+            setImage(xsize, ysize, buf, rect, bpp);
+            return getOCRText(filename, 1);
+        } catch (Exception e) {
+            logger.error(e.getMessage(), e);
+            throw new TesseractException(e);
+        } finally {
+            dispose();
+        }
+    }
+
+    /**
+     * Initializes Tesseract engine.
+     */
+    protected void init() {
+        handle = TessBaseAPICreate();
+        StringArray sarray = new StringArray(configList.toArray(new String[0]));
+        PointerByReference configs = new PointerByReference();
+        configs.setPointer(sarray);
+        TessBaseAPIInit1(handle, datapath, language, ocrEngineMode, configs, configList.size());
+        if (psm > -1) {
+            TessBaseAPISetPageSegMode(handle, psm);
+        }
+    }
+
+    /**
+     * Sets Tesseract's internal parameters.
+     */
+    protected void setTessVariables() {
+        Enumeration<?> em = prop.propertyNames();
+        while (em.hasMoreElements()) {
+            String key = (String) em.nextElement();
+            TessBaseAPISetVariable(handle, key, prop.getProperty(key));
+        }
+    }
+
+    /**
+     * A wrapper for {@link #setImage(int, int, ByteBuffer, Rectangle, int)}.
+     *
+     * @param image a rendered image
+     * @param rect region of interest
+     * @throws java.io.IOException
+     */
+    protected void setImage(RenderedImage image, Rectangle rect) throws IOException {
+        setImage(image.getWidth(), image.getHeight(), ImageIOHelper.getImageByteBuffer(image), rect, image
+                .getColorModel().getPixelSize());
+    }
+
+    /**
+     * Sets image to be processed.
+     *
+     * @param xsize width of image
+     * @param ysize height of image
+     * @param buf pixel data
+     * @param rect the bounding rectangle defines the region of the image to be
+     * recognized. A rectangle of zero dimension or <code>null</code> indicates
+     * the whole image.
+     * @param bpp bits per pixel, represents the bit depth of the image, with 1
+     * for binary bitmap, 8 for gray, and 24 for color RGB.
+     */
+    protected void setImage(int xsize, int ysize, ByteBuffer buf, Rectangle rect, int bpp) {
+        int bytespp = bpp / 8;
+        int bytespl = (int) Math.ceil(xsize * bpp / 8.0);
+        TessBaseAPISetImage(handle, buf, xsize, ysize, bytespp, bytespl);
+
+        if (rect != null && !rect.isEmpty()) {
+            TessBaseAPISetRectangle(handle, rect.x, rect.y, rect.width, rect.height);
+        }
+    }
+
+    /**
+     * Gets recognized text.
+     *
+     * @param filename input file name. Needed only for reading a UNLV zone
+     * file.
+     * @param pageNum page number; needed for hocr paging.
+     * @return the recognized text
+     */
+    protected String getOCRText(String filename, int pageNum) {
+        if (filename != null && !filename.isEmpty()) {
+            TessBaseAPISetInputName(handle, filename);
+        }
+
+        Pointer utf8Text = renderedFormat == RenderedFormat.HOCR ? TessBaseAPIGetHOCRText(handle, pageNum - 1) : TessBaseAPIGetUTF8Text(handle);
+        String str = utf8Text.getString(0);
+        TessDeleteText(utf8Text);
+        return str;
+    }
+
+    /**
+     * Creates renderers for given formats.
+     *
+     * @param outputbase
+     * @param formats
+     * @return
+     */
+    private TessResultRenderer createRenderers(String outputbase, List<RenderedFormat> formats) {
+        TessResultRenderer renderer = null;
+
+        for (RenderedFormat format : formats) {
+            switch (format) {
+                case TEXT:
+                    if (renderer == null) {
+                        renderer = TessTextRendererCreate(outputbase);
+                    } else {
+                        TessResultRendererInsert(renderer, TessTextRendererCreate(outputbase));
+                    }
+                    break;
+                case HOCR:
+                    if (renderer == null) {
+                        renderer = TessHOcrRendererCreate(outputbase);
+                    } else {
+                        TessResultRendererInsert(renderer, TessHOcrRendererCreate(outputbase));
+                    }
+                    break;
+                case PDF:
+                    String dataPath = TessBaseAPIGetDatapath(handle);
+                    if (renderer == null) {
+                        renderer = TessPDFRendererCreate(outputbase, dataPath);
+                    } else {
+                        TessResultRendererInsert(renderer, TessPDFRendererCreate(outputbase, dataPath));
+                    }
+                    break;
+                case BOX:
+                    if (renderer == null) {
+                        renderer = TessBoxTextRendererCreate(outputbase);
+                    } else {
+                        TessResultRendererInsert(renderer, TessBoxTextRendererCreate(outputbase));
+                    }
+                    break;
+                case UNLV:
+                    if (renderer == null) {
+                        renderer = TessUnlvRendererCreate(outputbase);
+                    } else {
+                        TessResultRendererInsert(renderer, TessUnlvRendererCreate(outputbase));
+                    }
+                    break;
+            }
+        }
+
+        return renderer;
+    }
+
+    /**
+     * Creates documents for given renderer.
+     *
+     * @param filename input image
+     * @param outputbase output filename without extension
+     * @param formats types of renderer
+     * @throws TesseractException
+     */
+    @Override
+    public void createDocuments(String filename, String outputbase, List<RenderedFormat> formats) throws TesseractException {
+        createDocuments(new String[]{filename}, new String[]{outputbase}, formats);
+    }
+
+    /**
+     * Creates documents.
+     *
+     * @param filenames array of input files
+     * @param outputbases array of output filenames without extension
+     * @param formats types of renderer
+     * @throws TesseractException
+     */
+    @Override
+    public void createDocuments(String[] filenames, String[] outputbases, List<RenderedFormat> formats) throws TesseractException {
+        if (filenames.length != outputbases.length) {
+            throw new RuntimeException("The two arrays must match in length.");
+        }
+
+        init();
+        setTessVariables();
+
+        try {
+            for (int i = 0; i < filenames.length; i++) {
+                File workingTiffFile = null;
+                try {
+                    String filename = filenames[i];
+
+                    // if PDF, convert to multi-page TIFF
+                    if (filename.toLowerCase().endsWith(".pdf")) {
+                        workingTiffFile = PdfUtilities.convertPdf2Tiff(new File(filename));
+                        filename = workingTiffFile.getPath();
+                    }
+
+                    TessResultRenderer renderer = createRenderers(outputbases[i], formats);
+                    createDocuments(filename, renderer);
+                    TessDeleteResultRenderer(renderer);
+                } catch (Exception e) {
+                    // skip the problematic image file
+                    logger.error(e.getMessage(), e);
+                } finally {
+                    if (workingTiffFile != null && workingTiffFile.exists()) {
+                        workingTiffFile.delete();
+                    }
+                }
+            }
+        } finally {
+            dispose();
+        }
+    }
+
+    /**
+     * Creates documents.
+     *
+     * @param filename input file
+     * @param renderer renderer
+     * @throws TesseractException
+     */
+    private void createDocuments(String filename, TessResultRenderer renderer) throws TesseractException {
+        TessBaseAPISetInputName(handle, filename); //for reading a UNLV zone file
+        int result = TessBaseAPIProcessPages(handle, filename, null, 0, renderer);
+
+//        if (result == ITessAPI.FALSE) {
+//            throw new TesseractException("Error during processing page.");
+//        }
+    }
+
+    /**
+     * Gets segmented regions at specified page iterator level.
+     *
+     * @param bi input image
+     * @param pageIteratorLevel TessPageIteratorLevel enum
+     * @return list of <code>Rectangle</code>
+     * @throws TesseractException
+     */
+    @Override
+    public List<Rectangle> getSegmentedRegions(BufferedImage bi, int pageIteratorLevel) throws TesseractException {
+        init();
+        setTessVariables();
+
+        try {
+            List<Rectangle> list = new ArrayList<Rectangle>();
+            setImage(bi, null);
+
+            Boxa boxes = TessBaseAPIGetComponentImages(handle, pageIteratorLevel, TRUE, null, null);
+            int boxCount = Leptonica1.boxaGetCount(boxes);
+            for (int i = 0; i < boxCount; i++) {
+                Box box = Leptonica1.boxaGetBox(boxes, i, L_CLONE);
+                if (box == null) {
+                    continue;
+                }
+                list.add(new Rectangle(box.x, box.y, box.w, box.h));
+                PointerByReference pRef = new PointerByReference();
+                pRef.setValue(box.getPointer());
+                Leptonica1.boxDestroy(pRef);
+            }
+
+            PointerByReference pRef = new PointerByReference();
+            pRef.setValue(boxes.getPointer());
+            Leptonica1.boxaDestroy(pRef);
+
+            return list;
+        } catch (IOException ioe) {
+            // skip the problematic image
+            logger.error(ioe.getMessage(), ioe);
+            throw new TesseractException(ioe);
+        } finally {
+            dispose();
+        }
+    }
+
+    /**
+     * Gets recognized words at specified page iterator level.
+     *
+     * @param bi input image
+     * @param pageIteratorLevel TessPageIteratorLevel enum
+     * @return list of <code>Word</code>
+     */
+    @Override
+    public List<Word> getWords(BufferedImage bi, int pageIteratorLevel) {
+        this.init();
+        this.setTessVariables();
+
+        List<Word> words = new ArrayList<Word>();
+
+        try {
+            setImage(bi, null);
+
+            TessBaseAPIRecognize(handle, null);
+            TessResultIterator ri = TessBaseAPIGetIterator(handle);
+            TessPageIterator pi = TessResultIteratorGetPageIterator(ri);
+            TessPageIteratorBegin(pi);
+
+            do {
+                Pointer ptr = TessResultIteratorGetUTF8Text(ri, pageIteratorLevel);
+                String text = ptr.getString(0);
+                TessAPI1.TessDeleteText(ptr);
+                float confidence = TessResultIteratorConfidence(ri, pageIteratorLevel);
+                IntBuffer leftB = IntBuffer.allocate(1);
+                IntBuffer topB = IntBuffer.allocate(1);
+                IntBuffer rightB = IntBuffer.allocate(1);
+                IntBuffer bottomB = IntBuffer.allocate(1);
+                TessPageIteratorBoundingBox(pi, pageIteratorLevel, leftB, topB, rightB, bottomB);
+                int left = leftB.get();
+                int top = topB.get();
+                int right = rightB.get();
+                int bottom = bottomB.get();
+                Word word = new Word(text, confidence, new Rectangle(left, top, right - left, bottom - top));
+                words.add(word);
+            } while (TessPageIteratorNext(pi, pageIteratorLevel) == TRUE);
+
+            return words;
+        } catch (Exception e) {
+            return words;
+        } finally {
+            dispose();
+        }
+    }
+
+    /**
+     * Releases all of the native resources used by this instance.
+     */
+    protected void dispose() {
+        TessBaseAPIDelete(handle);
+    }
+}
--- a/Tess4J/src/net/sourceforge/tess4j/TesseractException.java
+++ b/Tess4J/src/net/sourceforge/tess4j/TesseractException.java
@@ -0,0 +1,35 @@
+/**
+ * Copyright @ 2010 Quan Nguyen
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package net.sourceforge.tess4j;
+
+public class TesseractException extends Exception {
+
+    public TesseractException() {
+        super();
+    }
+
+    public TesseractException(String message) {
+        super(message);
+    }
+
+    public TesseractException(Throwable cause) {
+        super(cause);
+    }
+
+    public TesseractException(String message, Throwable cause) {
+        super(message, cause);
+    }
+}
--- a/Tess4J/src/net/sourceforge/tess4j/Word.java
+++ b/Tess4J/src/net/sourceforge/tess4j/Word.java
@@ -0,0 +1,67 @@
+/**
+ * Copyright @ 2015 Quan Nguyen
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package net.sourceforge.tess4j;
+
+import java.awt.Rectangle;
+
+/**
+ * Encapsulates Tesseract OCR results.
+ */
+public class Word {
+
+    private final String text;
+    private final float confidence;
+    private final Rectangle rect;
+
+    /**
+     * Constructor.
+     * 
+     * @param text
+     * @param confidence
+     * @param boundingBox 
+     */
+    public Word(String text, float confidence, Rectangle boundingBox) {
+        this.text = text;
+        this.confidence = confidence;
+        this.rect = boundingBox;
+    }
+
+    /**
+     * @return the text
+     */
+    public String getText() {
+        return text;
+    }
+
+    /**
+     * @return the confidence
+     */
+    public float getConfidence() {
+        return confidence;
+    }
+
+    /**
+     * @return the bounding box
+     */
+    public Rectangle getBoundingBox() {
+        return rect;
+    }
+
+    @Override
+    public String toString() {
+        return String.format("%s [Confidence: %f Bounding box: %d %d %d %d]", text, confidence, rect.x, rect.y, rect.width, rect.height);
+    }
+}
--- a/Tess4J/src/net/sourceforge/tess4j/util/ImageHelper.java
+++ b/Tess4J/src/net/sourceforge/tess4j/util/ImageHelper.java
@@ -0,0 +1,216 @@
+/**
+ * Copyright @ 2008 Quan Nguyen
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package net.sourceforge.tess4j.util;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.awt.Graphics2D;
+import java.awt.Image;
+import java.awt.RenderingHints;
+import java.awt.Toolkit;
+import java.awt.Transparency;
+import java.awt.datatransfer.Clipboard;
+import java.awt.datatransfer.DataFlavor;
+import java.awt.image.*;
+import javax.imageio.IIOImage;
+
+public class ImageHelper {
+
+    private static final Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
+
+    /**
+     * Convenience method that returns a scaled instance of the provided
+     * {@code BufferedImage}.
+     *
+     * @param image the original image to be scaled
+     * @param targetWidth the desired width of the scaled instance, in pixels
+     * @param targetHeight the desired height of the scaled instance, in pixels
+     * @return a scaled version of the original {@code BufferedImage}
+     */
+    public static BufferedImage getScaledInstance(BufferedImage image, int targetWidth, int targetHeight) {
+        int type = (image.getTransparency() == Transparency.OPAQUE)
+                ? BufferedImage.TYPE_INT_RGB : BufferedImage.TYPE_INT_ARGB;
+        BufferedImage tmp = new BufferedImage(targetWidth, targetHeight, type);
+        Graphics2D g2 = tmp.createGraphics();
+        g2.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC);
+        g2.drawImage(image, 0, 0, targetWidth, targetHeight, null);
+        g2.dispose();
+        return tmp;
+    }
+
+    /**
+     * Convenience method that returns a scaled instance of the provided
+     * {@code IIOImage}.
+     *
+     * @param iioSource the original image to be scaled
+     * @param scale the desired scale
+     * @return a scaled version of the original {@code IIOImage}
+     */
+    public static IIOImage getScaledInstance(IIOImage iioSource, float scale) {
+        if (!(iioSource.getRenderedImage() instanceof BufferedImage)) {
+            throw new IllegalArgumentException("RenderedImage in IIOImage must be BufferedImage");
+        }
+
+        if (Math.abs(scale - 1.0) < 0.001) {
+            return iioSource;
+        }
+
+        BufferedImage source = (BufferedImage) iioSource.getRenderedImage();
+        BufferedImage target = getScaledInstance(source, (int) (scale * source.getWidth()), (int) (scale * source.getHeight()));
+        return new IIOImage(target, null, null);
+    }
+
+    /**
+     * A replacement for the standard <code>BufferedImage.getSubimage</code>
+     * method.
+     *
+     * @param image
+     * @param x the X coordinate of the upper-left corner of the specified
+     * rectangular region
+     * @param y the Y coordinate of the upper-left corner of the specified
+     * rectangular region
+     * @param width the width of the specified rectangular region
+     * @param height the height of the specified rectangular region
+     * @return a BufferedImage that is the subimage of <code>image</code>.
+     */
+    public static BufferedImage getSubImage(BufferedImage image, int x, int y, int width, int height) {
+        int type = (image.getTransparency() == Transparency.OPAQUE)
+                ? BufferedImage.TYPE_INT_RGB : BufferedImage.TYPE_INT_ARGB;
+        BufferedImage tmp = new BufferedImage(width, height, type);
+        Graphics2D g2 = tmp.createGraphics();
+        g2.drawImage(image.getSubimage(x, y, width, height), 0, 0, null);
+        g2.dispose();
+        return tmp;
+    }
+
+    /**
+     * A simple method to convert an image to binary or B/W image.
+     *
+     * @param image input image
+     * @return a monochrome image
+     */
+    public static BufferedImage convertImageToBinary(BufferedImage image) {
+        BufferedImage tmp = new BufferedImage(image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_BINARY);
+        Graphics2D g2 = tmp.createGraphics();
+        g2.drawImage(image, 0, 0, null);
+        g2.dispose();
+        return tmp;
+    }
+
+    /**
+     * A simple method to convert an image to binary or B/W image.
+     *
+     * @param image input image
+     * @return a monochrome image
+     * @deprecated As of release 1.1, renamed to
+     * {@link #convertImageToBinary(BufferedImage image)}
+     */
+    @Deprecated
+    public static BufferedImage convertImage2Binary(BufferedImage image) {
+        return convertImageToBinary(image);
+    }
+
+    /**
+     * A simple method to convert an image to gray scale.
+     *
+     * @param image input image
+     * @return a monochrome image
+     */
+    public static BufferedImage convertImageToGrayscale(BufferedImage image) {
+        BufferedImage tmp = new BufferedImage(image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_GRAY);
+        Graphics2D g2 = tmp.createGraphics();
+        g2.drawImage(image, 0, 0, null);
+        g2.dispose();
+        return tmp;
+    }
+
+    private static final short[] invertTable;
+
+    static {
+        invertTable = new short[256];
+        for (int i = 0; i < 256; i++) {
+            invertTable[i] = (short) (255 - i);
+        }
+    }
+
+    /**
+     * Inverts image color.
+     *
+     * @param image input image
+     * @return an inverted-color image
+     */
+    public static BufferedImage invertImageColor(BufferedImage image) {
+        BufferedImage tmp = new BufferedImage(image.getWidth(), image.getHeight(), image.getType());
+        BufferedImageOp invertOp = new LookupOp(new ShortLookupTable(0, invertTable), null);
+        return invertOp.filter(image, tmp);
+    }
+
+    /**
+     * Rotates an image.
+     *
+     * @param image the original image
+     * @param angle the degree of rotation
+     * @return a rotated image
+     */
+    public static BufferedImage rotateImage(BufferedImage image, double angle) {
+        double theta = Math.toRadians(angle);
+        double sin = Math.abs(Math.sin(theta));
+        double cos = Math.abs(Math.cos(theta));
+        int w = image.getWidth();
+        int h = image.getHeight();
+        int newW = (int) Math.floor(w * cos + h * sin);
+        int newH = (int) Math.floor(h * cos + w * sin);
+
+        BufferedImage tmp = new BufferedImage(newW, newH, image.getType());
+        Graphics2D g2d = tmp.createGraphics();
+        g2d.setRenderingHint(RenderingHints.KEY_INTERPOLATION,
+                RenderingHints.VALUE_INTERPOLATION_BICUBIC);
+        g2d.translate((newW - w) / 2, (newH - h) / 2);
+        g2d.rotate(theta, w / 2, h / 2);
+        g2d.drawImage(image, 0, 0, null);
+        g2d.dispose();
+        return tmp;
+    }
+
+    /**
+     * Gets an image from Clipboard.
+     *
+     * @return image
+     */
+    public static Image getClipboardImage() {
+        Clipboard clipboard = Toolkit.getDefaultToolkit().getSystemClipboard();
+        try {
+            return (Image) clipboard.getData(DataFlavor.imageFlavor);
+        } catch (Exception e) {
+            return null;
+        }
+    }
+
+    /**
+     * Clones an image.
+     * http://stackoverflow.com/questions/3514158/how-do-you-clone-a-bufferedimage
+     *
+     * @param bi
+     * @return
+     */
+    public static BufferedImage cloneImage(BufferedImage bi) {
+        ColorModel cm = bi.getColorModel();
+        boolean isAlphaPremultiplied = cm.isAlphaPremultiplied();
+        WritableRaster raster = bi.copyData(null);
+        return new BufferedImage(cm, raster, isAlphaPremultiplied, null);
+    }
+}
--- a/Tess4J/src/net/sourceforge/tess4j/util/ImageIOHelper.java
+++ b/Tess4J/src/net/sourceforge/tess4j/util/ImageIOHelper.java
@@ -0,0 +1,642 @@
+/**
+ * Copyright @ 2008 Quan Nguyen
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package net.sourceforge.tess4j.util;
+
+import java.io.*;
+import java.util.*;
+import javax.imageio.*;
+import javax.imageio.stream.*;
+import javax.imageio.metadata.*;
+import java.awt.Toolkit;
+import java.awt.image.*;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+
+import org.w3c.dom.NodeList;
+
+import com.github.jaiimageio.plugins.tiff.*;
+import com.recognition.software.jdeskew.ImageDeskew;
+import com.recognition.software.jdeskew.ImageUtil;
+import org.apache.commons.io.FilenameUtils;
+
+public class ImageIOHelper {
+
+    final static String OUTPUT_FILE_NAME = "Tesstmp";
+    final static String TIFF_EXT = ".tif";
+    final static String TIFF_FORMAT = "tiff";
+    final static String JAI_IMAGE_WRITER_MESSAGE = "Need to install JAI Image I/O package.\nhttps://java.net/projects/jai-imageio/";
+    final static String JAI_IMAGE_READER_MESSAGE = "Unsupported image format. May need to install JAI Image I/O package.\nhttps://java.net/projects/jai-imageio/";
+
+    /**
+     * Creates a list of TIFF image files from an image file. It basically
+     * converts images of other formats to TIFF format, or a multi-page TIFF
+     * image to multiple TIFF image files.
+     *
+     * @param imageFile input image file
+     * @param index an index of the page; -1 means all pages, as in a multi-page
+     * TIFF image
+     * @return a list of TIFF image files
+     * @throws IOException
+     */
+    public static List<File> createTiffFiles(File imageFile, int index) throws IOException {
+        return createTiffFiles(imageFile, index, false);
+    }
+
+    /**
+     * Creates a list of TIFF image files from an image file. It basically
+     * converts images of other formats to TIFF format, or a multi-page TIFF
+     * image to multiple TIFF image files.
+     *
+     * @param imageFile input image file
+     * @param index an index of the page; -1 means all pages, as in a multi-page
+     * TIFF image
+     * @param preserve preserve compression mode
+     * @return a list of TIFF image files
+     * @throws IOException
+     */
+    public static List<File> createTiffFiles(File imageFile, int index, boolean preserve) throws IOException {
+        List<File> tiffFiles = new ArrayList<File>();
+
+        String imageFileName = imageFile.getName();
+        String imageFormat = imageFileName.substring(imageFileName.lastIndexOf('.') + 1);
+
+        Iterator<ImageReader> readers = ImageIO.getImageReadersByFormatName(imageFormat);
+
+        if (!readers.hasNext()) {
+            throw new RuntimeException(JAI_IMAGE_READER_MESSAGE);
+        }
+
+        ImageReader reader = readers.next();
+
+        ImageInputStream iis = ImageIO.createImageInputStream(imageFile);
+        reader.setInput(iis);
+        //Read the stream metadata
+//        IIOMetadata streamMetadata = reader.getStreamMetadata();
+
+        //Set up the writeParam
+        TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.US);
+
+        if (!preserve) {
+            tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED); // not preserve original sizes; decompress
+        }
+
+        //Get tif writer and set output to file
+        Iterator<ImageWriter> writers = ImageIO.getImageWritersByFormatName(TIFF_FORMAT);
+
+        if (!writers.hasNext()) {
+            throw new RuntimeException(JAI_IMAGE_WRITER_MESSAGE);
+        }
+
+        ImageWriter writer = writers.next();
+
+        //Read the stream metadata
+        IIOMetadata streamMetadata = writer.getDefaultStreamMetadata(tiffWriteParam);
+
+        int imageTotal = reader.getNumImages(true);
+
+        for (int i = 0; i < imageTotal; i++) {
+            // all if index == -1; otherwise, only index-th
+            if (index == -1 || i == index) {
+//                BufferedImage bi = reader.read(i);
+//                IIOImage oimage = new IIOImage(bi, null, reader.getImageMetadata(i));
+                IIOImage oimage = reader.readAll(i, reader.getDefaultReadParam());
+                File tiffFile = File.createTempFile(OUTPUT_FILE_NAME, TIFF_EXT);
+                ImageOutputStream ios = ImageIO.createImageOutputStream(tiffFile);
+                writer.setOutput(ios);
+                writer.write(streamMetadata, oimage, tiffWriteParam);
+                ios.close();
+                tiffFiles.add(tiffFile);
+            }
+        }
+        writer.dispose();
+        reader.dispose();
+
+        return tiffFiles;
+    }
+
+    /**
+     * Creates a list of TIFF image files from a list of <code>IIOImage</code>
+     * objects.
+     *
+     * @param imageList a list of <code>IIOImage</code> objects
+     * @param index an index of the page; -1 means all pages
+     * @return a list of TIFF image files
+     * @throws IOException
+     */
+    public static List<File> createTiffFiles(List<IIOImage> imageList, int index) throws IOException {
+        return createTiffFiles(imageList, index, 0, 0);
+    }
+
+    public static List<File> createTiffFiles(List<IIOImage> imageList, int index, int dpiX, int dpiY) throws IOException {
+        List<File> tiffFiles = new ArrayList<File>();
+
+        //Set up the writeParam
+        TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.US);
+        tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED);
+
+        //Get tif writer and set output to file
+        Iterator<ImageWriter> writers = ImageIO.getImageWritersByFormatName(TIFF_FORMAT);
+
+        if (!writers.hasNext()) {
+            throw new RuntimeException(JAI_IMAGE_WRITER_MESSAGE);
+        }
+
+        ImageWriter writer = writers.next();
+
+        //Get the stream metadata
+        IIOMetadata streamMetadata = writer.getDefaultStreamMetadata(tiffWriteParam);
+
+        // all if index == -1; otherwise, only index-th
+        for (IIOImage oimage : (index == -1 ? imageList : imageList.subList(index, index + 1))) {
+            if (dpiX != 0 && dpiY != 0) {
+                // Get the default image metadata.
+                ImageTypeSpecifier imageType = ImageTypeSpecifier.createFromRenderedImage(oimage.getRenderedImage());
+                IIOMetadata imageMetadata = writer.getDefaultImageMetadata(imageType, null);
+                imageMetadata = setDPIViaAPI(imageMetadata, dpiX, dpiY);
+                oimage.setMetadata(imageMetadata);
+            }
+
+            File tiffFile = File.createTempFile(OUTPUT_FILE_NAME, TIFF_EXT);
+            ImageOutputStream ios = ImageIO.createImageOutputStream(tiffFile);
+            writer.setOutput(ios);
+            writer.write(streamMetadata, oimage, tiffWriteParam);
+            ios.close();
+            tiffFiles.add(tiffFile);
+        }
+        writer.dispose();
+
+        return tiffFiles;
+    }
+
+    /**
+     * Set DPI using API.
+     *
+     * @param imageMetadata original IIOMetadata
+     * @param dpiX horizontal resolution
+     * @param dpiY vertical resolution
+     * @return modified IIOMetadata
+     * @throws IIOInvalidTreeException
+     */
+    private static IIOMetadata setDPIViaAPI(IIOMetadata imageMetadata, int dpiX, int dpiY)
+            throws IIOInvalidTreeException {
+        // Derive the TIFFDirectory from the metadata.
+        TIFFDirectory dir = TIFFDirectory.createFromMetadata(imageMetadata);
+
+        // Get {X,Y}Resolution tags.
+        BaselineTIFFTagSet base = BaselineTIFFTagSet.getInstance();
+        TIFFTag tagXRes = base.getTag(BaselineTIFFTagSet.TAG_X_RESOLUTION);
+        TIFFTag tagYRes = base.getTag(BaselineTIFFTagSet.TAG_Y_RESOLUTION);
+
+        // Create {X,Y}Resolution fields.
+        TIFFField fieldXRes = new TIFFField(tagXRes, TIFFTag.TIFF_RATIONAL,
+                1, new long[][]{{dpiX, 1}});
+        TIFFField fieldYRes = new TIFFField(tagYRes, TIFFTag.TIFF_RATIONAL,
+                1, new long[][]{{dpiY, 1}});
+
+        // Append {X,Y}Resolution fields to directory.
+        dir.addTIFFField(fieldXRes);
+        dir.addTIFFField(fieldYRes);
+
+        // Convert to metadata object.
+        IIOMetadata metadata = dir.getAsMetadata();
+
+        // Add other metadata.
+        IIOMetadataNode root = new IIOMetadataNode("javax_imageio_1.0");
+        IIOMetadataNode horiz = new IIOMetadataNode("HorizontalPixelSize");
+        horiz.setAttribute("value", Double.toString(25.4f / dpiX));
+        IIOMetadataNode vert = new IIOMetadataNode("VerticalPixelSize");
+        vert.setAttribute("value", Double.toString(25.4f / dpiY));
+        IIOMetadataNode dim = new IIOMetadataNode("Dimension");
+        dim.appendChild(horiz);
+        dim.appendChild(vert);
+        root.appendChild(dim);
+        metadata.mergeTree("javax_imageio_1.0", root);
+
+        return metadata;
+    }
+
+    /**
+     * Gets pixel data of an <code>IIOImage</code> object.
+     *
+     * @param image an <code>IIOImage</code> object
+     * @return a byte buffer of pixel data
+     * @throws IOException
+     */
+    public static ByteBuffer getImageByteBuffer(IIOImage image) throws IOException {
+        return getImageByteBuffer(image.getRenderedImage());
+    }
+
+    /**
+     * Gets pixel data of an <code>RenderedImage</code> object.
+     *
+     * @param image an <code>RenderedImage</code> object
+     * @return a byte buffer of pixel data
+     * @throws IOException
+     */
+    public static ByteBuffer getImageByteBuffer(RenderedImage image) throws IOException {
+        //Set up the writeParam
+        TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.US);
+        tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED);
+
+        //Get tif writer and set output to file
+        Iterator<ImageWriter> writers = ImageIO.getImageWritersByFormatName(TIFF_FORMAT);
+
+        if (!writers.hasNext()) {
+            throw new RuntimeException(JAI_IMAGE_WRITER_MESSAGE);
+        }
+
+        ImageWriter writer = writers.next();
+
+        //Get the stream metadata
+        IIOMetadata streamMetadata = writer.getDefaultStreamMetadata(tiffWriteParam);
+
+        ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
+        ImageOutputStream ios = ImageIO.createImageOutputStream(outputStream);
+        writer.setOutput(ios);
+        writer.write(streamMetadata, new IIOImage(image, null, null), tiffWriteParam);
+//        writer.write(image);
+        writer.dispose();
+//        ImageIO.write(image, "tiff", ios); // this can be used in lieu of writer
+        ios.seek(0);
+        BufferedImage bi = ImageIO.read(ios);
+        return convertImageData(bi);
+    }
+
+    /**
+     * Converts <code>BufferedImage</code> to <code>ByteBuffer</code>.
+     *
+     * @param bi Input image
+     * @return pixel data
+     */
+    public static ByteBuffer convertImageData(BufferedImage bi) {
+        DataBuffer buff = bi.getRaster().getDataBuffer();
+        // ClassCastException thrown if buff not instanceof DataBufferByte because raster data is not necessarily bytes.
+        // Convert the original buffered image to grayscale.
+        if (!(buff instanceof DataBufferByte)) {
+            bi = ImageHelper.convertImageToGrayscale(bi);
+            buff = bi.getRaster().getDataBuffer();
+        }
+        byte[] pixelData = ((DataBufferByte) buff).getData();
+        //        return ByteBuffer.wrap(pixelData);
+        ByteBuffer buf = ByteBuffer.allocateDirect(pixelData.length);
+        buf.order(ByteOrder.nativeOrder());
+        buf.put(pixelData);
+        buf.flip();
+        return buf;
+    }
+
+    /**
+     * Gets a list of <code>BufferedImage</code> objects for an image file.
+     *
+     * @param imageFile input image file. It can be any of the supported
+     * formats, including TIFF, JPEG, GIF, PNG, BMP, JPEG
+     * @return a list of <code>BufferedImage</code> objects
+     * @throws IOException
+     */
+    public static List<BufferedImage> getImageList(File imageFile) throws IOException {
+        ImageReader reader = null;
+        ImageInputStream iis = null;
+
+        try {
+            List<BufferedImage> biList = new ArrayList<BufferedImage>();
+
+            String imageFileName = imageFile.getName();
+            String imageFormat = imageFileName.substring(imageFileName.lastIndexOf('.') + 1);
+            Iterator<ImageReader> readers = ImageIO.getImageReadersByFormatName(imageFormat);
+            if (!readers.hasNext()) {
+                throw new RuntimeException(JAI_IMAGE_READER_MESSAGE);
+            }
+
+            reader = readers.next();
+
+            iis = ImageIO.createImageInputStream(imageFile);
+            reader.setInput(iis);
+
+            int imageTotal = reader.getNumImages(true);
+
+            for (int i = 0; i < imageTotal; i++) {
+                BufferedImage bi = reader.read(i);
+                biList.add(bi);
+            }
+
+            return biList;
+        } finally {
+            try {
+                if (iis != null) {
+                    iis.close();
+                }
+                if (reader != null) {
+                    reader.dispose();
+                }
+            } catch (Exception e) {
+                // ignore
+            }
+        }
+    }
+
+    /**
+     * Gets a list of <code>IIOImage</code> objects for an image file.
+     *
+     * @param imageFile input image file. It can be any of the supported
+     * formats, including TIFF, JPEG, GIF, PNG, BMP, JPEG, and PDF if GPL
+     * Ghostscript is installed
+     * @return a list of <code>IIOImage</code> objects
+     * @throws IOException
+     */
+    public static List<IIOImage> getIIOImageList(File imageFile) throws IOException {
+        File workingTiffFile = null;
+
+        ImageReader reader = null;
+        ImageInputStream iis = null;
+
+        try {
+            // convert PDF to TIFF
+            if (imageFile.getName().toLowerCase().endsWith(".pdf")) {
+                workingTiffFile = PdfUtilities.convertPdf2Tiff(imageFile);
+                imageFile = workingTiffFile;
+            }
+
+            List<IIOImage> iioImageList = new ArrayList<IIOImage>();
+
+            String imageFileName = imageFile.getName();
+            String imageFormat = imageFileName.substring(imageFileName.lastIndexOf('.') + 1);
+            if (imageFormat.matches("(pbm|pgm|ppm)")) {
+                imageFormat = "pnm";
+            } else if (imageFormat.matches("(jp2|j2k|jpf|jpx|jpm)")) {
+                imageFormat = "jpeg2000";
+            }
+            Iterator<ImageReader> readers = ImageIO.getImageReadersByFormatName(imageFormat);
+
+            if (!readers.hasNext()) {
+                throw new RuntimeException(JAI_IMAGE_READER_MESSAGE);
+            }
+
+            reader = readers.next();
+            iis = ImageIO.createImageInputStream(imageFile);
+            reader.setInput(iis);
+
+            int imageTotal = reader.getNumImages(true);
+
+            for (int i = 0; i < imageTotal; i++) {
+//                IIOImage oimage = new IIOImage(reader.read(i), null, reader.getImageMetadata(i));
+                IIOImage oimage = reader.readAll(i, reader.getDefaultReadParam());
+                iioImageList.add(oimage);
+            }
+
+            return iioImageList;
+        } finally {
+            try {
+                if (iis != null) {
+                    iis.close();
+                }
+                if (reader != null) {
+                    reader.dispose();
+                }
+            } catch (Exception e) {
+                // ignore
+            }
+            if (workingTiffFile != null && workingTiffFile.exists()) {
+                workingTiffFile.delete();
+            }
+        }
+    }
+
+    /**
+     * Gets a list of <code>IIOImage</code> objects for a
+     * <code>BufferedImage</code>.
+     *
+     * @param bi input image
+     * @return a list of <code>IIOImage</code> objects
+     * @throws IOException
+     */
+    public static List<IIOImage> getIIOImageList(BufferedImage bi) throws IOException {
+        List<IIOImage> iioImageList = new ArrayList<IIOImage>();
+        IIOImage oimage = new IIOImage(bi, null, null);
+        iioImageList.add(oimage);
+        return iioImageList;
+    }
+
+    /**
+     * Merges multiple images into one multi-page TIFF image.
+     *
+     * @param inputImages an array of image files
+     * @param outputTiff the output multi-page TIFF file
+     * @throws IOException
+     */
+    public static void mergeTiff(File[] inputImages, File outputTiff) throws IOException {
+        if (inputImages.length == 0) {
+            // if no image
+            return;
+        }
+
+        Iterator<ImageWriter> writers = ImageIO.getImageWritersByFormatName(TIFF_FORMAT);
+
+        if (!writers.hasNext()) {
+            throw new RuntimeException(JAI_IMAGE_WRITER_MESSAGE);
+        }
+
+        ImageWriter writer = writers.next();
+
+        //Set up the writeParam
+        TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.US);
+//        tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED); // commented out to preserve original sizes
+
+        //Get the stream metadata
+        IIOMetadata streamMetadata = writer.getDefaultStreamMetadata(tiffWriteParam);
+
+        ImageOutputStream ios = ImageIO.createImageOutputStream(outputTiff);
+        writer.setOutput(ios);
+
+        boolean firstPage = true;
+        int index = 1;
+        for (File inputImage : inputImages) {
+            List<IIOImage> iioImages = getIIOImageList(inputImage);
+            for (IIOImage iioImage : iioImages) {
+                if (firstPage) {
+                    writer.write(streamMetadata, iioImage, tiffWriteParam);
+                    firstPage = false;
+                } else {
+                    writer.writeInsert(index++, iioImage, tiffWriteParam);
+                }
+            }
+        }
+
+        ios.close();
+
+        writer.dispose();
+    }
+
+    /**
+     * Merges multiple images into one multi-page TIFF image.
+     *
+     * @param inputImages an array of <code>BufferedImage</code>
+     * @param outputTiff the output TIFF file
+     * @throws IOException
+     */
+    public static void mergeTiff(BufferedImage[] inputImages, File outputTiff) throws IOException {
+        mergeTiff(inputImages, outputTiff, null);
+    }
+
+    /**
+     * Merges multiple images into one multi-page TIFF image.
+     *
+     * @param inputImages an array of <code>BufferedImage</code>
+     * @param outputTiff the output TIFF file
+     * @param compressionType valid values: LZW, CCITT T.6, PackBits
+     * @throws IOException
+     */
+    public static void mergeTiff(BufferedImage[] inputImages, File outputTiff, String compressionType) throws IOException {
+        List<IIOImage> imageList = new ArrayList<IIOImage>();
+
+        for (BufferedImage inputImage : inputImages) {
+            imageList.add(new IIOImage(inputImage, null, null));
+        }
+
+        mergeTiff(imageList, outputTiff, compressionType);
+    }
+
+    /**
+     * Merges multiple images into one multi-page TIFF image.
+     *
+     * @param imageList a list of <code>IIOImage</code> objects
+     * @param outputTiff the output TIFF file
+     * @throws IOException
+     */
+    public static void mergeTiff(List<IIOImage> imageList, File outputTiff) throws IOException {
+        mergeTiff(imageList, outputTiff, null);
+    }
+
+    /**
+     * Merges multiple images into one multi-page TIFF image.
+     *
+     * @param imageList a list of <code>IIOImage</code> objects
+     * @param outputTiff the output TIFF file
+     * @param compressionType valid values: LZW, CCITT T.6, PackBits
+     * @throws IOException
+     */
+    public static void mergeTiff(List<IIOImage> imageList, File outputTiff, String compressionType) throws IOException {
+        if (imageList == null || imageList.isEmpty()) {
+            // if no image
+            return;
+        }
+
+        Iterator<ImageWriter> writers = ImageIO.getImageWritersByFormatName(TIFF_FORMAT);
+        if (!writers.hasNext()) {
+            throw new RuntimeException(JAI_IMAGE_WRITER_MESSAGE);
+        }
+
+        ImageWriter writer = writers.next();
+
+        //Set up the writeParam
+        TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.US);
+//        tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED); // comment out to preserve original sizes
+        if (compressionType != null) {
+            tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_EXPLICIT);
+            tiffWriteParam.setCompressionType(compressionType);
+        }
+
+        //Get the stream metadata
+        IIOMetadata streamMetadata = writer.getDefaultStreamMetadata(tiffWriteParam);
+
+        ImageOutputStream ios = ImageIO.createImageOutputStream(outputTiff);
+        writer.setOutput(ios);
+
+        int dpiX = 300;
+        int dpiY = 300;
+
+        for (IIOImage iioImage : imageList) {
+            // Get the default image metadata.
+            ImageTypeSpecifier imageType = ImageTypeSpecifier.createFromRenderedImage(iioImage.getRenderedImage());
+            IIOMetadata imageMetadata = writer.getDefaultImageMetadata(imageType, null);
+            imageMetadata = setDPIViaAPI(imageMetadata, dpiX, dpiY);
+            iioImage.setMetadata(imageMetadata);
+        }
+
+        IIOImage firstIioImage = imageList.remove(0);
+        writer.write(streamMetadata, firstIioImage, tiffWriteParam);
+
+        int i = 1;
+        for (IIOImage iioImage : imageList) {
+            writer.writeInsert(i++, iioImage, tiffWriteParam);
+        }
+        ios.close();
+
+        writer.dispose();
+    }
+
+    /**
+     * Deskews image.
+     * 
+     * @param imageFile input image
+     * @param minimumDeskewThreshold minimum deskew threshold (typically, 0.05d)
+     * @return temporary multi-page TIFF image file
+     * @throws IOException 
+     */
+    public static File deskewImage(File imageFile, double minimumDeskewThreshold) throws IOException {
+        List<BufferedImage> imageList = getImageList(imageFile);
+        for (int i = 0; i < imageList.size(); i++) {
+            BufferedImage bi = imageList.get(i);
+            ImageDeskew deskew = new ImageDeskew(bi);
+            double imageSkewAngle = deskew.getSkewAngle();
+
+            if ((imageSkewAngle > minimumDeskewThreshold || imageSkewAngle < -(minimumDeskewThreshold))) {
+                bi = ImageUtil.rotate(bi, -imageSkewAngle, bi.getWidth() / 2, bi.getHeight() / 2);
+                imageList.set(i, bi); // replace original with deskewed image
+            }
+        }
+
+        File tempImageFile = File.createTempFile(FilenameUtils.getBaseName(imageFile.getName()), ".tif");
+        ImageIOHelper.mergeTiff(imageList.toArray(new BufferedImage[0]), tempImageFile);
+
+        return tempImageFile;
+    }
+
+    /**
+     * Reads image meta data.
+     *
+     * @param oimage
+     * @return a map of meta data
+     */
+    public static Map<String, String> readImageData(IIOImage oimage) {
+        Map<String, String> dict = new HashMap<String, String>();
+
+        IIOMetadata imageMetadata = oimage.getMetadata();
+        if (imageMetadata != null) {
+            IIOMetadataNode dimNode = (IIOMetadataNode) imageMetadata.getAsTree("javax_imageio_1.0");
+            NodeList nodes = dimNode.getElementsByTagName("HorizontalPixelSize");
+            int dpiX;
+            if (nodes.getLength() > 0) {
+                float dpcWidth = Float.parseFloat(nodes.item(0).getAttributes().item(0).getNodeValue());
+                dpiX = (int) Math.round(25.4f / dpcWidth);
+            } else {
+                dpiX = Toolkit.getDefaultToolkit().getScreenResolution();
+            }
+            dict.put("dpiX", String.valueOf(dpiX));
+
+            nodes = dimNode.getElementsByTagName("VerticalPixelSize");
+            int dpiY;
+            if (nodes.getLength() > 0) {
+                float dpcHeight = Float.parseFloat(nodes.item(0).getAttributes().item(0).getNodeValue());
+                dpiY = (int) Math.round(25.4f / dpcHeight);
+            } else {
+                dpiY = Toolkit.getDefaultToolkit().getScreenResolution();
+            }
+            dict.put("dpiY", String.valueOf(dpiY));
+        }
+
+        return dict;
+    }
+}
--- a/Tess4J/src/net/sourceforge/tess4j/util/LoadLibs.java
+++ b/Tess4J/src/net/sourceforge/tess4j/util/LoadLibs.java
@@ -0,0 +1,232 @@
+/**
+ * Copyright @ 2014 Quan Nguyen
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package net.sourceforge.tess4j.util;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.JarURLConnection;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.net.URLConnection;
+import java.util.Enumeration;
+import java.util.jar.JarEntry;
+import java.util.jar.JarFile;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.IOUtils;
+import org.jboss.vfs.VFS;
+import org.jboss.vfs.VirtualFile;
+import org.slf4j.LoggerFactory;
+
+import com.sun.jna.Native;
+import com.sun.jna.Platform;
+
+import net.sourceforge.tess4j.TessAPI;
+
+/**
+ * Loads native libraries from JAR or project folder.
+ *
+ * @author O.J. Sousa Rodrigues
+ * @author Quan Nguyen
+ */
+public class LoadLibs {
+
+    private static final String VFS_PROTOCOL = "vfs";
+    private static final String JNA_LIBRARY_PATH = "jna.library.path";
+    public static final String TESS4J_TEMP_DIR = new File(System.getProperty("java.io.tmpdir"), "tess4j").getPath();
+
+    /**
+     * Native library name.
+     */
+    public static final String LIB_NAME = "libtesseract3051";
+    public static final String LIB_NAME_NON_WIN = "tesseract";
+
+    private static final org.slf4j.Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
+
+    static {
+        System.setProperty("jna.encoding", "UTF8");
+        File targetTempFolder = extractTessResources(Platform.RESOURCE_PREFIX);
+        if (targetTempFolder != null && targetTempFolder.exists()) {
+            String userCustomizedPath = System.getProperty(JNA_LIBRARY_PATH);
+            if (null == userCustomizedPath || userCustomizedPath.isEmpty()) {
+                System.setProperty(JNA_LIBRARY_PATH, targetTempFolder.getPath());
+            } else {
+                System.setProperty(JNA_LIBRARY_PATH, userCustomizedPath + File.pathSeparator + targetTempFolder.getPath());
+            }
+        }
+    }
+
+    /**
+     * Loads Tesseract library via JNA.
+     *
+     * @return TessAPI instance being loaded using
+     * <code>Native.loadLibrary()</code>.
+     */
+    public static TessAPI getTessAPIInstance() {
+        return (TessAPI) Native.loadLibrary(getTesseractLibName(), TessAPI.class);
+    }
+
+    /**
+     * Gets native library name.
+     *
+     * @return the name of the tesseract library to be loaded using the
+     * <code>Native.register()</code>.
+     */
+    public static String getTesseractLibName() {
+        return Platform.isWindows() ? LIB_NAME : LIB_NAME_NON_WIN;
+    }
+
+    /**
+     * Extracts tesseract resources to temp folder.
+     *
+     * @param resourceName name of file or directory
+     * @return target path, which could be file or directory
+     */
+    public static synchronized File extractTessResources(String resourceName) {
+        File targetPath = null;
+
+        try {
+            targetPath = new File(TESS4J_TEMP_DIR, resourceName);
+
+            Enumeration<URL> resources = LoadLibs.class.getClassLoader().getResources(resourceName);
+            while (resources.hasMoreElements()) {
+                URL resourceUrl = resources.nextElement();
+                copyResources(resourceUrl, targetPath);
+            }
+        } catch (IOException | URISyntaxException e) {
+            logger.warn(e.getMessage(), e);
+        }
+
+        return targetPath;
+    }
+
+    /**
+     * Copies resources to target folder.
+     *
+     * @param resourceUrl
+     * @param targetPath
+     * @return
+     */
+    static void copyResources(URL resourceUrl, File targetPath) throws IOException, URISyntaxException {
+        if (resourceUrl == null) {
+            return;
+        }
+
+        URLConnection urlConnection = resourceUrl.openConnection();
+
+        /**
+         * Copy resources either from inside jar or from project folder.
+         */
+        if (urlConnection instanceof JarURLConnection) {
+            copyJarResourceToPath((JarURLConnection) urlConnection, targetPath);
+        } else if (VFS_PROTOCOL.equals(resourceUrl.getProtocol())) {
+            VirtualFile virtualFileOrFolder = VFS.getChild(resourceUrl.toURI());
+            copyFromWarToFolder(virtualFileOrFolder, targetPath);
+        } else {
+            File file = new File(resourceUrl.getPath());
+            if (file.isDirectory()) {
+                for (File resourceFile : FileUtils.listFiles(file, null, true)) {
+                    int index = resourceFile.getPath().lastIndexOf(targetPath.getName()) + targetPath.getName().length();
+                    File targetFile = new File(targetPath, resourceFile.getPath().substring(index));
+                    if (!targetFile.exists() || targetFile.length() != resourceFile.length()) {
+                        if (resourceFile.isFile()) {
+                            FileUtils.copyFile(resourceFile, targetFile);
+                        }
+                    }
+                }
+            } else {
+                if (!targetPath.exists() || targetPath.length() != file.length()) {
+                    FileUtils.copyFile(file, targetPath);
+                }
+            }
+        }
+    }
+
+    /**
+     * Copies resources from the jar file of the current thread and extract it
+     * to the destination path.
+     *
+     * @param jarConnection
+     * @param destPath destination file or directory
+     */
+    static void copyJarResourceToPath(JarURLConnection jarConnection, File destPath) {
+        try (JarFile jarFile = jarConnection.getJarFile()) {
+            String jarConnectionEntryName = jarConnection.getEntryName();
+            if (!jarConnectionEntryName.endsWith("/")) {
+                jarConnectionEntryName += "/";
+            }
+
+            /**
+             * Iterate all entries in the jar file.
+             */
+            for (Enumeration<JarEntry> e = jarFile.entries(); e.hasMoreElements();) {
+                JarEntry jarEntry = e.nextElement();
+                String jarEntryName = jarEntry.getName();
+
+                /**
+                 * Extract files only if they match the path.
+                 */
+                if (jarEntryName.startsWith(jarConnectionEntryName)) {
+                    String filename = jarEntryName.substring(jarConnectionEntryName.length());
+                    File targetFile = new File(destPath, filename);
+
+                    if (jarEntry.isDirectory()) {
+                        targetFile.mkdirs();
+                    } else {
+                        if (!targetFile.exists() || targetFile.length() != jarEntry.getSize()) {
+                            try (InputStream is = jarFile.getInputStream(jarEntry);
+                                    OutputStream out = FileUtils.openOutputStream(targetFile)) {
+                                IOUtils.copy(is, out);
+                            }
+                        }
+                    }
+                }
+            }
+        } catch (IOException e) {
+            logger.warn(e.getMessage(), e);
+        }
+    }
+
+    /**
+     * Copies resources from WAR to target folder.
+     *
+     * @param virtualFileOrFolder
+     * @param targetFolder
+     * @throws IOException
+     */
+    static void copyFromWarToFolder(VirtualFile virtualFileOrFolder, File targetFolder) throws IOException {
+        if (virtualFileOrFolder.isDirectory() && !virtualFileOrFolder.getName().contains(".")) {
+            if (targetFolder.getName().equalsIgnoreCase(virtualFileOrFolder.getName())) {
+                for (VirtualFile innerFileOrFolder : virtualFileOrFolder.getChildren()) {
+                    copyFromWarToFolder(innerFileOrFolder, targetFolder);
+                }
+            } else {
+                File innerTargetFolder = new File(targetFolder, virtualFileOrFolder.getName());
+                innerTargetFolder.mkdir();
+                for (VirtualFile innerFileOrFolder : virtualFileOrFolder.getChildren()) {
+                    copyFromWarToFolder(innerFileOrFolder, innerTargetFolder);
+                }
+            }
+        } else {
+            File targetFile = new File(targetFolder, virtualFileOrFolder.getName());
+            if (!targetFile.exists() || targetFile.length() != virtualFileOrFolder.getSize()) {
+                FileUtils.copyURLToFile(virtualFileOrFolder.asFileURL(), targetFile);
+            }
+        }
+    }
+}
--- a/Tess4J/src/net/sourceforge/tess4j/util/LoggHelper.java
+++ b/Tess4J/src/net/sourceforge/tess4j/util/LoggHelper.java
@@ -0,0 +1,34 @@
+/**
+ * Copyright @ 2015 Quan Nguyen
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package net.sourceforge.tess4j.util;
+
+/**
+ * Helper for logging.
+ *
+ * @author O.J. Sousa Rodrigues
+ */
+public class LoggHelper extends Exception {
+
+    @Override
+    public String toString() {
+        LoggerConfig.INSTANCE.loadConfig();
+
+        StackTraceElement[] sTrace = this.getStackTrace();
+        String className = sTrace[0].getClassName();
+
+        return className;
+    }
+}
--- a/Tess4J/src/net/sourceforge/tess4j/util/LoggerConfig.java
+++ b/Tess4J/src/net/sourceforge/tess4j/util/LoggerConfig.java
@@ -0,0 +1,51 @@
+/**
+ * Copyright @ 2015 Quan Nguyen
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package net.sourceforge.tess4j.util;
+
+import org.slf4j.bridge.SLF4JBridgeHandler;
+
+/**
+ * Logging configuration.
+ *
+ * @author O.J. Sousa Rodrigues
+ */
+public enum LoggerConfig {
+
+    INSTANCE;
+
+    private boolean isLoaded = false;
+
+    /**
+     * This method loads the Logger configuration.
+     *
+     * @return true if the Logger configuration was loaded successfully.
+     */
+    public boolean loadConfig() {
+
+        try {
+            if (!isLoaded) {
+                SLF4JBridgeHandler.removeHandlersForRootLogger();
+                SLF4JBridgeHandler.install();
+                this.isLoaded = true;
+//                System.out.println("Logger configuration was loaded successfully.");
+            }
+        } catch (final Exception e) {
+            System.err.println("Logger configuration could not be loaded.");
+        }
+
+        return this.isLoaded;
+    }
+}
--- a/Tess4J/src/net/sourceforge/tess4j/util/PdfBoxUtilities.java
+++ b/Tess4J/src/net/sourceforge/tess4j/util/PdfBoxUtilities.java
@@ -0,0 +1,215 @@
+/**
+ * Copyright @ 2018 Quan Nguyen
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package net.sourceforge.tess4j.util;
+
+import java.awt.image.BufferedImage;
+import java.io.File;
+import java.io.FilenameFilter;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.List;
+
+import org.apache.pdfbox.io.MemoryUsageSetting;
+import org.apache.pdfbox.multipdf.PDFMergerUtility;
+import org.apache.pdfbox.multipdf.Splitter;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.rendering.ImageType;
+import org.apache.pdfbox.rendering.PDFRenderer;
+import org.apache.pdfbox.tools.imageio.ImageIOUtil;
+import org.slf4j.LoggerFactory;
+
+/**
+ * PDF utilities based on PDFBox.
+ *
+ * @author Robert Drysdale
+ * @author Quan Nguyen
+ */
+public class PdfBoxUtilities {
+
+    private static final org.slf4j.Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
+
+    /**
+     * Converts PDF to TIFF format.
+     *
+     * @param inputPdfFile input file
+     * @return a multi-page TIFF image
+     * @throws IOException
+     */
+    public static File convertPdf2Tiff(File inputPdfFile) throws IOException {
+        File[] pngFiles = null;
+
+        try {
+            pngFiles = convertPdf2Png(inputPdfFile);
+            File tiffFile = File.createTempFile("multipage", ".tif");
+
+            // put PNG images into a single multi-page TIFF image for return
+            ImageIOHelper.mergeTiff(pngFiles, tiffFile);
+            return tiffFile;
+        } finally {
+            if (pngFiles != null && pngFiles.length > 0) {
+                // get the working directory of the PNG files
+                File pngDirectory = new File(pngFiles[0].getParent());
+                // delete temporary PNG images
+                for (File tempFile : pngFiles) {
+                    tempFile.delete();
+                }
+
+                pngDirectory.delete();
+            }
+        }
+    }
+
+    /**
+     * Converts PDF to PNG format.
+     *
+     * @param inputPdfFile input file
+     * @return an array of PNG images
+     * @throws java.io.IOException
+     */
+    public static File[] convertPdf2Png(File inputPdfFile) throws IOException {
+        Path path = Files.createTempDirectory("tessimages");
+        File imageDir = path.toFile();
+
+        PDDocument document = null;
+        try {
+            document = PDDocument.load(inputPdfFile);
+            PDFRenderer pdfRenderer = new PDFRenderer(document);
+            for (int page = 0; page < document.getNumberOfPages(); ++page) {
+                BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
+
+                // suffix in filename will be used as the file format
+                String filename = String.format("workingimage%04d.png", page + 1);
+                ImageIOUtil.writeImage(bim, new File(imageDir, filename).getAbsolutePath(), 300);
+            }
+        } catch (IOException ioe) {
+            logger.error("Error extracting PDF Document => " + ioe);
+        } finally {
+            if (imageDir.list().length == 0) {
+                imageDir.delete();
+            }
+
+            if (document != null) {
+                try {
+                    document.close();
+                } catch (Exception e) {
+                }
+            }
+        }
+
+        // find working files
+        File[] workingFiles = imageDir.listFiles(new FilenameFilter() {
+
+            @Override
+            public boolean accept(File dir, String name) {
+                return name.toLowerCase().matches("workingimage\\d{4}\\.png$");
+            }
+        });
+
+        Arrays.sort(workingFiles, new Comparator<File>() {
+            @Override
+            public int compare(File f1, File f2) {
+                return f1.getName().compareTo(f2.getName());
+            }
+        });
+
+        return workingFiles;
+    }
+
+    /**
+     * Splits PDF.
+     *
+     * @param inputPdfFile input file
+     * @param outputPdfFile output file
+     * @param firstPage begin page
+     * @param lastPage end page
+     */
+    public static void splitPdf(File inputPdfFile, File outputPdfFile, int firstPage, int lastPage) {
+        PDDocument document = null;
+        try {
+            document = PDDocument.load(inputPdfFile);
+            Splitter splitter = new Splitter();
+
+            splitter.setStartPage(firstPage);
+            splitter.setEndPage(lastPage);
+            splitter.setSplitAtPage(lastPage - firstPage + 1);
+
+            List<PDDocument> documents = splitter.split(document);
+
+            if (documents.size() == 1) {
+                PDDocument outputPdf = documents.get(0);
+                outputPdf.save(outputPdfFile);
+                outputPdf.close();
+            } else {
+                logger.error("Splitter returned " + documents.size() + " documents rather than expected of 1");
+            }
+        } catch (IOException ioe) {
+            logger.error("Exception splitting PDF => " + ioe);
+        } finally {
+            if (document != null) {
+                try {
+                    document.close();
+                } catch (Exception e) {
+                }
+            }
+        }
+    }
+
+    /**
+     * Gets PDF Page Count.
+     *
+     * @param inputPdfFile input file
+     * @return number of pages
+     */
+    public static int getPdfPageCount(File inputPdfFile) {
+        PDDocument document = null;
+        try {
+            document = PDDocument.load(inputPdfFile);
+            return document.getNumberOfPages();
+        } catch (IOException ioe) {
+            logger.error("Error counting PDF pages => " + ioe);
+            return - 1;
+        } finally {
+            if (document != null) {
+                try {
+                    document.close();
+                } catch (Exception e) {
+                }
+            }
+        }
+    }
+
+    /**
+     * Merges PDF files.
+     *
+     * @param inputPdfFiles array of input files
+     * @param outputPdfFile output file
+     */
+    public static void mergePdf(File[] inputPdfFiles, File outputPdfFile) {
+        try {
+            PDFMergerUtility mergerUtility = new PDFMergerUtility();
+            mergerUtility.setDestinationFileName(outputPdfFile.getPath());
+            for (File inputPdfFile : inputPdfFiles) {
+                mergerUtility.addSource(inputPdfFile);
+            }
+            mergerUtility.mergeDocuments(MemoryUsageSetting.setupMainMemoryOnly());
+        } catch (IOException ioe) {
+            logger.error("Error counting PDF pages => " + ioe);
+        }
+    }
+}
--- a/Tess4J/src/net/sourceforge/tess4j/util/PdfGsUtilities.java
+++ b/Tess4J/src/net/sourceforge/tess4j/util/PdfGsUtilities.java
@@ -0,0 +1,319 @@
+/**
+ * Copyright @ 2009 Quan Nguyen
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package net.sourceforge.tess4j.util;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FilenameFilter;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.List;
+
+import org.ghost4j.Ghostscript;
+import org.ghost4j.GhostscriptException;
+import org.slf4j.LoggerFactory;
+
+/**
+ * PDF utilities based on Ghostscript.
+ */
+public class PdfGsUtilities {
+
+    public static final String GS_INSTALL = "\nPlease download, install GPL Ghostscript from http://www.ghostscript.com\nand/or set the appropriate path variable.";
+
+    private static final org.slf4j.Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
+
+    /**
+     * Converts PDF to TIFF format.
+     *
+     * @param inputPdfFile input file
+     * @return a multi-page TIFF image
+     * @throws IOException
+     */
+    public static File convertPdf2Tiff(File inputPdfFile) throws IOException {
+        File[] pngFiles = null;
+
+        try {
+            pngFiles = convertPdf2Png(inputPdfFile);
+            File tiffFile = File.createTempFile("multipage", ".tif");
+
+            // put PNG images into a single multi-page TIFF image for return
+            ImageIOHelper.mergeTiff(pngFiles, tiffFile);
+            return tiffFile;
+        } finally {
+            if (pngFiles != null && pngFiles.length > 0) {
+                // get the working directory of the PNG files
+                File pngDirectory = new File(pngFiles[0].getParent());
+                // delete temporary PNG images
+                for (File tempFile : pngFiles) {
+                    tempFile.delete();
+                }
+
+                pngDirectory.delete();
+            }
+        }
+    }
+
+    /**
+     * Converts PDF to PNG format.
+     *
+     * @param inputPdfFile input file
+     * @return an array of PNG images
+     * @throws java.io.IOException
+     */
+    public synchronized static File[] convertPdf2Png(File inputPdfFile) throws IOException {
+        Path path = Files.createTempDirectory("tessimages");
+        File imageDir = path.toFile();
+
+        //get Ghostscript instance
+        Ghostscript gs = Ghostscript.getInstance();
+
+        //prepare Ghostscript interpreter parameters
+        //refer to Ghostscript documentation for parameter usage
+        List<String> gsArgs = new ArrayList<String>();
+        gsArgs.add("-gs");
+        gsArgs.add("-dNOPAUSE");
+        gsArgs.add("-dQUIET");
+        gsArgs.add("-dBATCH");
+        gsArgs.add("-dSAFER");
+        gsArgs.add("-sDEVICE=pnggray");
+        gsArgs.add("-r300");
+        gsArgs.add("-dGraphicsAlphaBits=4");
+        gsArgs.add("-dTextAlphaBits=4");
+        gsArgs.add("-sOutputFile=" + imageDir.getPath() + "/workingimage%04d.png");
+        gsArgs.add(inputPdfFile.getPath());
+
+        //execute and exit interpreter
+        try {
+            synchronized (gs) {
+                gs.initialize(gsArgs.toArray(new String[0]));
+                gs.exit();
+            }
+        } catch (UnsatisfiedLinkError e) {
+            logger.error(e.getMessage());
+            throw new RuntimeException(getMessage(e.getMessage()));
+        } catch (NoClassDefFoundError e) {
+            logger.error(e.getMessage());
+            throw new RuntimeException(getMessage(e.getMessage()));
+        } catch (GhostscriptException e) {
+            logger.error(e.getMessage());
+            throw new RuntimeException(e.getMessage());
+        } finally {
+            if (imageDir.list().length == 0) {
+                imageDir.delete();
+            }
+
+            //delete interpreter instance (safer)
+            try {
+                Ghostscript.deleteInstance();
+            } catch (GhostscriptException e) {
+                //nothing
+            }
+        }
+
+        // find working files
+        File[] workingFiles = imageDir.listFiles(new FilenameFilter() {
+
+            @Override
+            public boolean accept(File dir, String name) {
+                return name.toLowerCase().matches("workingimage\\d{4}\\.png$");
+            }
+        });
+
+        Arrays.sort(workingFiles, new Comparator<File>() {
+            @Override
+            public int compare(File f1, File f2) {
+                return f1.getName().compareTo(f2.getName());
+            }
+        });
+
+        return workingFiles;
+    }
+
+    /**
+     * Splits PDF.
+     *
+     * @param inputPdfFile input file
+     * @param outputPdfFile output file
+     * @param firstPage begin page
+     * @param lastPage end page
+     */
+    public static void splitPdf(File inputPdfFile, File outputPdfFile, int firstPage, int lastPage) {
+        //get Ghostscript instance
+        Ghostscript gs = Ghostscript.getInstance();
+
+        //prepare Ghostscript interpreter parameters
+        //refer to Ghostscript documentation for parameter usage
+        //gs -sDEVICE=pdfwrite -dNOPAUSE -dQUIET -dBATCH -dFirstPage=m -dLastPage=n -sOutputFile=out.pdf in.pdf
+        List<String> gsArgs = new ArrayList<String>();
+        gsArgs.add("-gs");
+        gsArgs.add("-dNOPAUSE");
+        gsArgs.add("-dQUIET");
+        gsArgs.add("-dBATCH");
+        gsArgs.add("-sDEVICE=pdfwrite");
+        if (firstPage > 0) {
+            gsArgs.add("-dFirstPage=" + firstPage);
+        }
+        if (lastPage > 0) {
+            gsArgs.add("-dLastPage=" + lastPage);
+        }
+        gsArgs.add("-sOutputFile=" + outputPdfFile.getPath());
+        gsArgs.add(inputPdfFile.getPath());
+
+        //execute and exit interpreter
+        try {
+            synchronized (gs) {
+                gs.initialize(gsArgs.toArray(new String[0]));
+                gs.exit();
+            }
+        } catch (UnsatisfiedLinkError e) {
+            logger.error(e.getMessage());
+            throw new RuntimeException(getMessage(e.getMessage()));
+        } catch (NoClassDefFoundError e) {
+            logger.error(e.getMessage());
+            throw new RuntimeException(getMessage(e.getMessage()));
+        } catch (GhostscriptException e) {
+            logger.error(e.getMessage());
+            throw new RuntimeException(e.getMessage());
+        } finally {
+            //delete interpreter instance (safer)
+            try {
+                Ghostscript.deleteInstance();
+            } catch (GhostscriptException e) {
+                //nothing
+            }
+        }
+    }
+
+    /**
+     * Gets PDF Page Count.
+     *
+     * @param inputPdfFile input file
+     * @return number of pages
+     */
+    public static int getPdfPageCount(File inputPdfFile) {
+        //get Ghostscript instance
+        Ghostscript gs = Ghostscript.getInstance();
+
+        //prepare Ghostscript interpreter parameters
+        //refer to Ghostscript documentation for parameter usage
+        //gs -q -dNODISPLAY -c "(input.pdf) (r) file runpdfbegin pdfpagecount = quit"
+        List<String> gsArgs = new ArrayList<String>();
+        gsArgs.add("-gs");
+        gsArgs.add("-dNOPAUSE");
+        gsArgs.add("-dQUIET");
+        gsArgs.add("-dNODISPLAY");
+        gsArgs.add("-dBATCH");
+        gsArgs.add("-c");
+        String cValue = String.format("(%s) (r) file runpdfbegin pdfpagecount = quit", inputPdfFile.getPath().replace('\\', '/'));
+        gsArgs.add(cValue);
+
+        int pageCount = 0;
+        ByteArrayOutputStream os;
+
+        //execute and exit interpreter
+        try {
+            synchronized (gs) {
+                //output
+                os = new ByteArrayOutputStream();
+                gs.setStdOut(os);
+                gs.initialize(gsArgs.toArray(new String[0]));
+                pageCount = Integer.parseInt(os.toString().trim());
+                os.close();
+            }
+        } catch (UnsatisfiedLinkError e) {
+            logger.error(e.getMessage());
+            throw new RuntimeException(getMessage(e.getMessage()));
+        } catch (NoClassDefFoundError e) {
+            logger.error(e.getMessage());
+            throw new RuntimeException(getMessage(e.getMessage()));
+        } catch (GhostscriptException e) {
+            logger.error(e.getMessage());
+            throw new RuntimeException(e.getMessage());
+        } catch (Exception e) {
+            logger.error(e.getMessage());
+        } finally {
+            //delete interpreter instance (safer)
+            try {
+                Ghostscript.deleteInstance();
+            } catch (GhostscriptException e) {
+                //nothing
+            }
+        }
+
+        return pageCount;
+    }
+
+    /**
+     * Merges PDF files.
+     *
+     * @param inputPdfFiles array of input files
+     * @param outputPdfFile output file
+     */
+    public static void mergePdf(File[] inputPdfFiles, File outputPdfFile) {
+        //get Ghostscript instance
+        Ghostscript gs = Ghostscript.getInstance();
+
+        //prepare Ghostscript interpreter parameters
+        //refer to Ghostscript documentation for parameter usage
+        //gs -sDEVICE=pdfwrite -dNOPAUSE -dQUIET -dBATCH -sOutputFile=out.pdf in1.pdf in2.pdf in3.pdf
+        List<String> gsArgs = new ArrayList<String>();
+        gsArgs.add("-gs");
+        gsArgs.add("-dNOPAUSE");
+        gsArgs.add("-dQUIET");
+        gsArgs.add("-dBATCH");
+        gsArgs.add("-sDEVICE=pdfwrite");
+        gsArgs.add("-sOutputFile=" + outputPdfFile.getPath());
+
+        for (File inputPdfFile : inputPdfFiles) {
+            gsArgs.add(inputPdfFile.getPath());
+        }
+
+        //execute and exit interpreter
+        try {
+            synchronized (gs) {
+                gs.initialize(gsArgs.toArray(new String[0]));
+                gs.exit();
+            }
+        } catch (UnsatisfiedLinkError e) {
+            logger.error(e.getMessage());
+            throw new RuntimeException(getMessage(e.getMessage()));
+        } catch (NoClassDefFoundError e) {
+            logger.error(e.getMessage());
+            throw new RuntimeException(getMessage(e.getMessage()));
+        } catch (GhostscriptException e) {
+            logger.error(e.getMessage());
+            throw new RuntimeException(e.getMessage());
+        } finally {
+            //delete interpreter instance (safer)
+            try {
+                Ghostscript.deleteInstance();
+            } catch (GhostscriptException e) {
+                //nothing
+            }
+        }
+    }
+
+    static String getMessage(String message) {
+        if (message.contains("library 'gs") || message.contains("ghost4j")) {
+            return message + GS_INSTALL;
+        }
+        return message;
+    }
+}
--- a/Tess4J/src/net/sourceforge/tess4j/util/PdfUtilities.java
+++ b/Tess4J/src/net/sourceforge/tess4j/util/PdfUtilities.java
@@ -0,0 +1,163 @@
+/**
+ * Copyright @ 2009 Quan Nguyen
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package net.sourceforge.tess4j.util;
+
+import java.io.File;
+import java.io.IOException;
+
+/**
+ * PDF utilities based on Ghostscript or PDFBox with Ghostscript as default. If
+ * Ghostscript is not available on the system, then PDFBox is used. Call
+ * <code>System.setProperty(PDF_LIBRARY, PDFBOX);</code> to set PDFBox as
+ * default.
+ */
+public class PdfUtilities {
+
+    public static final String PDF_LIBRARY = "pdf.library";
+    public static final String PDFBOX = "pdfbox";
+
+    /**
+     * Converts PDF to TIFF format.
+     *
+     * @param inputPdfFile input file
+     * @return a multi-page TIFF image
+     * @throws IOException
+     */
+    public static File convertPdf2Tiff(File inputPdfFile) throws IOException {
+        if (PDFBOX.equals(System.getProperty(PDF_LIBRARY))) {
+            return PdfBoxUtilities.convertPdf2Tiff(inputPdfFile);
+        } else {
+            try {
+                return PdfGsUtilities.convertPdf2Tiff(inputPdfFile);
+            } catch (Exception e) {
+                System.setProperty(PDF_LIBRARY, PDFBOX);
+                return convertPdf2Tiff(inputPdfFile);
+            }
+        }
+    }
+
+    /**
+     * Converts PDF to PNG format.
+     *
+     * @param inputPdfFile input file
+     * @return an array of PNG images
+     * @throws java.io.IOException
+     */
+    public static File[] convertPdf2Png(File inputPdfFile) throws IOException {
+        if (PDFBOX.equals(System.getProperty(PDF_LIBRARY))) {
+            return PdfBoxUtilities.convertPdf2Png(inputPdfFile);
+        } else {
+            try {
+                return PdfGsUtilities.convertPdf2Png(inputPdfFile);
+            } catch (Exception e) {
+                System.setProperty(PDF_LIBRARY, PDFBOX);
+                return convertPdf2Png(inputPdfFile);
+            }
+        }
+    }
+
+    /**
+     * Splits PDF.
+     *
+     * @deprecated As of Release 3.0.
+     *
+     * @param inputPdfFile input file
+     * @param outputPdfFile output file
+     * @param firstPage begin page
+     * @param lastPage end page
+     */
+    public static void splitPdf(String inputPdfFile, String outputPdfFile, String firstPage, String lastPage) {
+        if (firstPage.trim().isEmpty()) {
+            firstPage = "0";
+        }
+        if (lastPage.trim().isEmpty()) {
+            lastPage = "0";
+        }
+
+        splitPdf(new File(inputPdfFile), new File(outputPdfFile), Integer.parseInt(firstPage), Integer.parseInt(lastPage));
+    }
+
+    /**
+     * Splits PDF.
+     *
+     * @param inputPdfFile input file
+     * @param outputPdfFile output file
+     * @param firstPage begin page
+     * @param lastPage end page
+     */
+    public static void splitPdf(File inputPdfFile, File outputPdfFile, int firstPage, int lastPage) {
+        if (PDFBOX.equals(System.getProperty(PDF_LIBRARY))) {
+            PdfBoxUtilities.splitPdf(inputPdfFile, outputPdfFile, firstPage, lastPage);
+        } else {
+            try {
+                PdfGsUtilities.splitPdf(inputPdfFile, outputPdfFile, firstPage, lastPage);
+            } catch (Exception e) {
+                System.setProperty(PDF_LIBRARY, PDFBOX);
+                splitPdf(inputPdfFile, outputPdfFile, firstPage, lastPage);
+            }
+        }
+    }
+
+    /**
+     * Gets PDF Page Count.
+     *
+     * @deprecated As of Release 3.0.
+     *
+     * @param inputPdfFile input file
+     * @return number of pages
+     */
+    public static int getPdfPageCount(String inputPdfFile) {
+        return getPdfPageCount(new File(inputPdfFile));
+    }
+
+    /**
+     * Gets PDF Page Count.
+     *
+     * @param inputPdfFile input file
+     * @return number of pages
+     */
+    public static int getPdfPageCount(File inputPdfFile) {
+        if (PDFBOX.equals(System.getProperty(PDF_LIBRARY))) {
+            return PdfBoxUtilities.getPdfPageCount(inputPdfFile);
+        } else {
+            try {
+                return PdfGsUtilities.getPdfPageCount(inputPdfFile);
+            } catch (Exception e) {
+                System.setProperty(PDF_LIBRARY, PDFBOX);
+                return getPdfPageCount(inputPdfFile);
+            }
+        }
+    }
+
+    /**
+     * Merges PDF files.
+     *
+     * @param inputPdfFiles array of input files
+     * @param outputPdfFile output file
+     */
+    public static void mergePdf(File[] inputPdfFiles, File outputPdfFile) {
+        if (PDFBOX.equals(System.getProperty(PDF_LIBRARY))) {
+            PdfBoxUtilities.mergePdf(inputPdfFiles, outputPdfFile);
+        } else {
+            try {
+                PdfGsUtilities.mergePdf(inputPdfFiles, outputPdfFile);
+            } catch (Exception e) {
+                System.setProperty(PDF_LIBRARY, PDFBOX);
+                mergePdf(inputPdfFiles, outputPdfFile);
+            }
+        }
+    }
+}
--- a/Tess4J/src/net/sourceforge/tess4j/util/Utils.java
+++ b/Tess4J/src/net/sourceforge/tess4j/util/Utils.java
@@ -0,0 +1,74 @@
+/*
+ * Copyright @ 2013 Quan Nguyen
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package net.sourceforge.tess4j.util;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.lang.reflect.Field;
+import java.lang.reflect.Modifier;
+
+public class Utils {
+
+    /**
+     * Writes byte array to file.
+     *
+     * @param data byte array
+     * @param outFile output file
+     * @throws IOException
+     */
+    public static void writeFile(byte[] data, File outFile) throws IOException {
+        FileOutputStream fos = null;
+
+        try {
+            // create parent dirs when necessary
+            if (outFile.getParentFile() != null) {
+                outFile.getParentFile().mkdirs();
+            }
+
+            fos = new FileOutputStream(outFile);
+            fos.write(data);
+        } finally {
+            if (fos != null) {
+                fos.close();
+            }
+        }
+    }
+
+    /**
+     * Gets user-friendly name of the public static final constant defined in a
+     * class or an interface for display purpose.
+     *
+     * @param value the constant value
+     * @param c type of class or interface
+     * @return name
+     */
+    public static String getConstantName(Object value, Class c) {
+        for (Field f : c.getDeclaredFields()) {
+            int mod = f.getModifiers();
+            if (Modifier.isStatic(mod) && Modifier.isPublic(mod) && Modifier.isFinal(mod)) {
+                try {
+                    if (f.get(null).equals(value)) {
+                        return f.getName();
+                    }
+                } catch (IllegalAccessException e) {
+                    return String.valueOf(value);
+                }
+            }
+        }
+        return String.valueOf(value);
+    }
+}