/** * Copyright @ 2014 Quan Nguyen * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package net.sourceforge.tess4j; import java.awt.Rectangle; import java.awt.image.BufferedImage; import java.io.File; import java.nio.ByteBuffer; import java.util.List; import javax.imageio.IIOImage; /** * An interface represents common OCR methods. */ public interface ITesseract { String htmlBeginTag = "\n" + "\n
\nnull
indicates
* the whole image.
* @return the recognized text
* @throws TesseractException
*/
String doOCR(File imageFile, Rectangle rect) throws TesseractException;
/**
* Performs OCR operation.
*
* @param bi a buffered image
* @return the recognized text
* @throws TesseractException
*/
String doOCR(BufferedImage bi) throws TesseractException;
/**
* Performs OCR operation.
*
* @param bi a buffered image
* @param rect the bounding rectangle defines the region of the image to be
* recognized. A rectangle of zero dimension or null
indicates
* the whole image.
* @return the recognized text
* @throws TesseractException
*/
String doOCR(BufferedImage bi, Rectangle rect) throws TesseractException;
/**
* Performs OCR operation.
*
* @param imageList a list of IIOImage
objects
* @param rect the bounding rectangle defines the region of the image to be
* recognized. A rectangle of zero dimension or null
indicates
* the whole image.
* @return the recognized text
* @throws TesseractException
*/
String doOCR(ListIIOImage
objects
* @param filename input file name. Needed only for training and reading a
* UNLV zone file.
* @param rect the bounding rectangle defines the region of the image to be
* recognized. A rectangle of zero dimension or null
indicates
* the whole image.
* @return the recognized text
* @throws TesseractException
*/
String doOCR(ListSetImage
, (optionally)
* SetRectangle
, and one or more of the Get*Text
* functions.
*
* @param xsize width of image
* @param ysize height of image
* @param buf pixel data
* @param rect the bounding rectangle defines the region of the image to be
* recognized. A rectangle of zero dimension or null
indicates
* the whole image.
* @param bpp bits per pixel, represents the bit depth of the image, with 1
* for binary bitmap, 8 for gray, and 24 for color RGB.
* @return the recognized text
* @throws TesseractException
*/
String doOCR(int xsize, int ysize, ByteBuffer buf, Rectangle rect, int bpp) throws TesseractException;
/**
* Performs OCR operation. Use SetImage
, (optionally)
* SetRectangle
, and one or more of the Get*Text
* functions.
*
* @param xsize width of image
* @param ysize height of image
* @param buf pixel data
* @param filename input file name. Needed only for training and reading a
* UNLV zone file.
* @param rect the bounding rectangle defines the region of the image to be
* recognized. A rectangle of zero dimension or null
indicates
* the whole image.
* @param bpp bits per pixel, represents the bit depth of the image, with 1
* for binary bitmap, 8 for gray, and 24 for color RGB.
* @return the recognized text
* @throws TesseractException
*/
String doOCR(int xsize, int ysize, ByteBuffer buf, String filename, Rectangle rect, int bpp) throws TesseractException;
/**
* Sets tessdata path.
*
* @param datapath the tessdata path to set
*/
void setDatapath(String datapath);
/**
* Sets language for OCR.
*
* @param language the language code, which follows ISO 639-3 standard.
*/
void setLanguage(String language);
/**
* Sets OCR engine mode.
*
* @param ocrEngineMode the OcrEngineMode to set
*/
void setOcrEngineMode(int ocrEngineMode);
/**
* Sets page segmentation mode.
*
* @param mode the page segmentation mode to set
*/
void setPageSegMode(int mode);
/**
* Sets the value of Tesseract's internal parameter.
*
* @param key variable name, e.g., tessedit_create_hocr
,
* tessedit_char_whitelist
, etc.
* @param value value for corresponding variable, e.g., "1", "0",
* "0123456789", etc.
*/
void setTessVariable(String key, String value);
/**
* Sets configs to be passed to Tesseract's Init
method.
*
* @param configs list of config filenames, e.g., "digits", "bazaar",
* "quiet"
*/
void setConfigs(ListRectangle
* @throws TesseractException
*/
ListWord
*/
List