From 3eb6ddff903fc38526230325e85043bc4ce0826e Mon Sep 17 00:00:00 2001 From: Hugo Date: Thu, 26 Sep 2019 12:45:28 +0200 Subject: [PATCH] Modif des classes, ajout OCR, Final? --- NGCC/src/ocr_orm/ControleurOCR.java | 46 +++++++++++++++++++++++++++++ NGCC/src/ocr_orm/OCR.java | 12 +++++++- 2 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 NGCC/src/ocr_orm/ControleurOCR.java diff --git a/NGCC/src/ocr_orm/ControleurOCR.java b/NGCC/src/ocr_orm/ControleurOCR.java new file mode 100644 index 0000000..3016f32 --- /dev/null +++ b/NGCC/src/ocr_orm/ControleurOCR.java @@ -0,0 +1,46 @@ +package ocr_orm; +import java.awt.image.BufferedImage; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + + +import org.apache.pdfbox.pdmodel.PDDocument; + +public class ControleurOCR { + + public HashMap getNumNote(String path){ + + PdfToImage pdfAnalyzer = new PdfToImage(); + File pdfFile; + PDDocument document = null; + //LISTE DES IMAGES + ArrayList images = new ArrayList<>(); // stockera les images (resultat) + //HASHMAP POUR LE CSV + HashMap listeNumNote = new HashMap(); + + // CONVERT PAGES TO IMAGES + try { + String pdfFilesDirectory = "C:\\Users\\ph807242\\eclipse-workspace\\PT\\pdf\\"; + // nom du fichier pdf à ouvrir (TODO: changer le chemin) + List files = pdfAnalyzer.listAllFiles(pdfFilesDirectory, ".pdf"); + for (String fname : files) { + pdfFile = new File(fname); + document = PDDocument.load(pdfFile); // charge le fichier pdf cree pour le traiter + images.addAll(pdfAnalyzer.convertPagesToBWJPG(document)); + // appelle la methode qui convertit les pages en images (jpg) noir et blanches + } + } catch (IOException e) { + System.out.println(e); + } + + //LISTE DES IMAGES COMPRENANT L'IMAGE DE LA NOTE ET DU NUM ETUDIANT + ListeImageNGCC liNGCC = new ListeImageNGCC(images); + + listeNumNote = liNGCC.doOCR(); + return listeNumNote; + + } +} diff --git a/NGCC/src/ocr_orm/OCR.java b/NGCC/src/ocr_orm/OCR.java index 6195ce6..8db7c89 100644 --- a/NGCC/src/ocr_orm/OCR.java +++ b/NGCC/src/ocr_orm/OCR.java @@ -1,6 +1,6 @@ package ocr_orm; import java.awt.image.BufferedImage; -import java.io.File; + import net.sourceforge.tess4j.Tesseract; import net.sourceforge.tess4j.TesseractException; @@ -11,7 +11,17 @@ public class OCR { public String getOCR(BufferedImage img) { //FAIRE L'OCR + Tesseract tesseract = new Tesseract(); String str=""; + try { + tesseract.setOcrEngineMode(2); + tesseract.setTessVariable("tessedit_char_whitelist", "A-Za-z1-9"); + str=tesseract.doOCR(img); + } catch (TesseractException e) { + + e.printStackTrace(); + } + return str; }