Modif des classes, ajout OCR, Final?
This commit is contained in:
parent
dd4b4d2471
commit
3eb6ddff90
46
NGCC/src/ocr_orm/ControleurOCR.java
Normal file
46
NGCC/src/ocr_orm/ControleurOCR.java
Normal file
@ -0,0 +1,46 @@
|
||||
package ocr_orm;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
|
||||
public class ControleurOCR {
|
||||
|
||||
public HashMap<String,String> getNumNote(String path){
|
||||
|
||||
PdfToImage pdfAnalyzer = new PdfToImage();
|
||||
File pdfFile;
|
||||
PDDocument document = null;
|
||||
//LISTE DES IMAGES
|
||||
ArrayList<BufferedImage> images = new ArrayList<>(); // stockera les images (resultat)
|
||||
//HASHMAP POUR LE CSV
|
||||
HashMap<String,String> listeNumNote = new HashMap<String, String>();
|
||||
|
||||
// CONVERT PAGES TO IMAGES
|
||||
try {
|
||||
String pdfFilesDirectory = "C:\\Users\\ph807242\\eclipse-workspace\\PT\\pdf\\";
|
||||
// nom du fichier pdf à ouvrir (TODO: changer le chemin)
|
||||
List<String> files = pdfAnalyzer.listAllFiles(pdfFilesDirectory, ".pdf");
|
||||
for (String fname : files) {
|
||||
pdfFile = new File(fname);
|
||||
document = PDDocument.load(pdfFile); // charge le fichier pdf cree pour le traiter
|
||||
images.addAll(pdfAnalyzer.convertPagesToBWJPG(document));
|
||||
// appelle la methode qui convertit les pages en images (jpg) noir et blanches
|
||||
}
|
||||
} catch (IOException e) {
|
||||
System.out.println(e);
|
||||
}
|
||||
|
||||
//LISTE DES IMAGES COMPRENANT L'IMAGE DE LA NOTE ET DU NUM ETUDIANT
|
||||
ListeImageNGCC liNGCC = new ListeImageNGCC(images);
|
||||
|
||||
listeNumNote = liNGCC.doOCR();
|
||||
return listeNumNote;
|
||||
|
||||
}
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
package ocr_orm;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.File;
|
||||
|
||||
|
||||
import net.sourceforge.tess4j.Tesseract;
|
||||
import net.sourceforge.tess4j.TesseractException;
|
||||
@ -11,7 +11,17 @@ public class OCR {
|
||||
|
||||
public String getOCR(BufferedImage img) {
|
||||
//FAIRE L'OCR
|
||||
Tesseract tesseract = new Tesseract();
|
||||
String str="";
|
||||
try {
|
||||
tesseract.setOcrEngineMode(2);
|
||||
tesseract.setTessVariable("tessedit_char_whitelist", "A-Za-z1-9");
|
||||
str=tesseract.doOCR(img);
|
||||
} catch (TesseractException e) {
|
||||
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
|
||||
return str;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user