From 228c16c7fe32089c3e06c2eb00902aea86f86a5b Mon Sep 17 00:00:00 2001 From: GrossPaul Date: Thu, 26 Sep 2019 16:24:36 +0200 Subject: [PATCH 1/5] adding new part ocr --- NGCC/src/ocr/Copie.java | 8 +++++++ NGCC/src/ocr/ImagesCopie.java | 23 +++++++++++++++++++ NGCC/src/ocr/Img.java | 37 ++++++++++++++++++++++++++++++ NGCC/src/ocr/ImgNote.java | 19 ++++++++++++++++ NGCC/src/ocr/ImgNumEtu.java | 18 +++++++++++++++ NGCC/src/ocr/Rogneur.java | 43 +++++++++++++++++++++++++++++++++++ 6 files changed, 148 insertions(+) create mode 100644 NGCC/src/ocr/Copie.java create mode 100644 NGCC/src/ocr/ImagesCopie.java create mode 100644 NGCC/src/ocr/Img.java create mode 100644 NGCC/src/ocr/ImgNote.java create mode 100644 NGCC/src/ocr/ImgNumEtu.java create mode 100644 NGCC/src/ocr/Rogneur.java diff --git a/NGCC/src/ocr/Copie.java b/NGCC/src/ocr/Copie.java new file mode 100644 index 0000000..56773a4 --- /dev/null +++ b/NGCC/src/ocr/Copie.java @@ -0,0 +1,8 @@ +package ocr; + +public class Copie { + + ImagesCopie base; + + +} diff --git a/NGCC/src/ocr/ImagesCopie.java b/NGCC/src/ocr/ImagesCopie.java new file mode 100644 index 0000000..261ef89 --- /dev/null +++ b/NGCC/src/ocr/ImagesCopie.java @@ -0,0 +1,23 @@ +package ocr; + +import java.awt.image.BufferedImage; +import java.util.Map; + +public abstract class ImagesCopie { + + private Map hMapImgs; + + + public ImagesCopie(BufferedImage imgOriginale) { + + hMapImgs = Rogneur.createHMapImgs(imgOriginale); + } + + public void applyOcrForEach() { + + for(String s : hMapImgs.keySet()) + { + hMapImgs.get(s).applyOcrImg(); + } + } +} diff --git a/NGCC/src/ocr/Img.java b/NGCC/src/ocr/Img.java new file mode 100644 index 0000000..3647c45 --- /dev/null +++ b/NGCC/src/ocr/Img.java @@ -0,0 +1,37 @@ +package ocr; + +import java.awt.image.BufferedImage; + +public abstract class Img { + + BufferedImage img; + String description; + + public Img(BufferedImage img) { + + this.img = img; + + } + + public abstract void applyOcrImg(); + + public BufferedImage getImg() { + return img; + } + + public void setImg(BufferedImage img) { + this.img = img; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + + + + +} diff --git a/NGCC/src/ocr/ImgNote.java b/NGCC/src/ocr/ImgNote.java new file mode 100644 index 0000000..d923d09 --- /dev/null +++ b/NGCC/src/ocr/ImgNote.java @@ -0,0 +1,19 @@ +package ocr; + +import java.awt.image.BufferedImage; + +import ocr_orm.OCR; + +public class ImgNote extends Img{ + + public ImgNote(BufferedImage img) { + super(img); + // TODO Auto-generated constructor stub + } + + @Override + public void applyOcrImg() { + setDescription(OCR.applyOcrNumber(getImg()));; + } + +} diff --git a/NGCC/src/ocr/ImgNumEtu.java b/NGCC/src/ocr/ImgNumEtu.java new file mode 100644 index 0000000..04b63d2 --- /dev/null +++ b/NGCC/src/ocr/ImgNumEtu.java @@ -0,0 +1,18 @@ +package ocr; + +import java.awt.image.BufferedImage; + +import ocr_orm.OCR; + +public class ImgNumEtu extends Img{ + + public ImgNumEtu(BufferedImage img) { + super(img); + } + + @Override + public void applyOcrImg() { + setDescription(OCR.applyOcrNumber(getImg()));; + } + +} \ No newline at end of file diff --git a/NGCC/src/ocr/Rogneur.java b/NGCC/src/ocr/Rogneur.java new file mode 100644 index 0000000..5bacf55 --- /dev/null +++ b/NGCC/src/ocr/Rogneur.java @@ -0,0 +1,43 @@ +package ocr; + +import java.awt.image.BufferedImage; +import java.util.HashMap; +import java.util.Map; + +public class Rogneur { + + // Retourne une hashmap contenant une image et la description de son contenu + public static Map createHMapImgs(BufferedImage imgOriginale) { + + Map temp = new HashMap<>(); + temp.put("NumEtu", rogneurFormatNote(BufferedImage imgOriginale)); + temp.put("Note", rogneurFormatNote(BufferedImage imgOriginale)); + temp.put("FormatNote", rogneurFormatNote(BufferedImage imgOriginale)); + + return temp; + } + + // rogne la partie du numEtu + public Img rogneurNumEtu(BufferedImage imgOriginale) + { + + // A FAIRE + //return new ImgNum(); + } + + // rogne la partie de la note + public Img rogneurNote(BufferedImage imgOriginale) + { + + // A FAIRE + //return new ImgNum(); + } + + // rogne la partie du format de la note + public Img rogneurFormatNote(BufferedImage imgOriginale) + { + + // A FAIRE + //return new ImgNum(); + } +} From 5b062f11d7d32e2ea85f4e0be439a3c0bfadf134 Mon Sep 17 00:00:00 2001 From: Hugo Date: Fri, 27 Sep 2019 08:51:47 +0200 Subject: [PATCH 2/5] Ajout d'un ocr plus agile --- NGCC/src/ocr/ImagesCopie.java | 1 + NGCC/src/ocr/ImgNote.java | 4 ++-- NGCC/src/ocr/ImgNumEtu.java | 4 ++-- NGCC/src/ocr/OCR.java | 28 ++++++++++++++++++++++++++++ NGCC/src/ocr/Rogneur.java | 24 +++++++++--------------- NGCC/src/ocr/diag.ucls | 10 ++++++++++ 6 files changed, 52 insertions(+), 19 deletions(-) create mode 100644 NGCC/src/ocr/OCR.java create mode 100644 NGCC/src/ocr/diag.ucls diff --git a/NGCC/src/ocr/ImagesCopie.java b/NGCC/src/ocr/ImagesCopie.java index 261ef89..39bb319 100644 --- a/NGCC/src/ocr/ImagesCopie.java +++ b/NGCC/src/ocr/ImagesCopie.java @@ -10,6 +10,7 @@ public abstract class ImagesCopie { public ImagesCopie(BufferedImage imgOriginale) { + hMapImgs = Rogneur.createHMapImgs(imgOriginale); } diff --git a/NGCC/src/ocr/ImgNote.java b/NGCC/src/ocr/ImgNote.java index d923d09..bdc4db6 100644 --- a/NGCC/src/ocr/ImgNote.java +++ b/NGCC/src/ocr/ImgNote.java @@ -2,7 +2,7 @@ package ocr; import java.awt.image.BufferedImage; -import ocr_orm.OCR; + public class ImgNote extends Img{ @@ -13,7 +13,7 @@ public class ImgNote extends Img{ @Override public void applyOcrImg() { - setDescription(OCR.applyOcrNumber(getImg()));; + setDescription(OCR.applyOcrNumber(getImg())); } } diff --git a/NGCC/src/ocr/ImgNumEtu.java b/NGCC/src/ocr/ImgNumEtu.java index 04b63d2..c32f08a 100644 --- a/NGCC/src/ocr/ImgNumEtu.java +++ b/NGCC/src/ocr/ImgNumEtu.java @@ -2,7 +2,7 @@ package ocr; import java.awt.image.BufferedImage; -import ocr_orm.OCR; + public class ImgNumEtu extends Img{ @@ -12,7 +12,7 @@ public class ImgNumEtu extends Img{ @Override public void applyOcrImg() { - setDescription(OCR.applyOcrNumber(getImg()));; + setDescription(OCR.applyOcrNumber(getImg())); } } \ No newline at end of file diff --git a/NGCC/src/ocr/OCR.java b/NGCC/src/ocr/OCR.java new file mode 100644 index 0000000..96cf264 --- /dev/null +++ b/NGCC/src/ocr/OCR.java @@ -0,0 +1,28 @@ +package ocr; +import java.awt.image.BufferedImage; + + +import net.sourceforge.tess4j.Tesseract; +import net.sourceforge.tess4j.TesseractException; + +public class OCR { + + + + public static String applyOcrNumber(BufferedImage img) { + //FAIRE L'OCR + Tesseract tesseract = new Tesseract(); + String str=""; + try { + tesseract.setOcrEngineMode(2); + tesseract.setTessVariable("tessedit_char_whitelist","0-9"); + str=tesseract.doOCR(img); + } catch (TesseractException e) { + + e.printStackTrace(); + } + + + return str; + } +} diff --git a/NGCC/src/ocr/Rogneur.java b/NGCC/src/ocr/Rogneur.java index 5bacf55..3063b9a 100644 --- a/NGCC/src/ocr/Rogneur.java +++ b/NGCC/src/ocr/Rogneur.java @@ -10,34 +10,28 @@ public class Rogneur { public static Map createHMapImgs(BufferedImage imgOriginale) { Map temp = new HashMap<>(); - temp.put("NumEtu", rogneurFormatNote(BufferedImage imgOriginale)); - temp.put("Note", rogneurFormatNote(BufferedImage imgOriginale)); - temp.put("FormatNote", rogneurFormatNote(BufferedImage imgOriginale)); + temp.put("NumEtu", rogneurFormatNote(imgOriginale)); + temp.put("Note", rogneurFormatNote(imgOriginale)); + temp.put("FormatNote", rogneurFormatNote(imgOriginale)); return temp; } // rogne la partie du numEtu - public Img rogneurNumEtu(BufferedImage imgOriginale) + public static Img rogneurNumEtu(BufferedImage imgOriginale) { - - // A FAIRE - //return new ImgNum(); + return imgOriginale.getSubimage(x, y, w, h); } // rogne la partie de la note - public Img rogneurNote(BufferedImage imgOriginale) + public static Img rogneurNote(BufferedImage imgOriginale) { - - // A FAIRE - //return new ImgNum(); + return imgOriginale.getSubimage(x, y, w, h); } // rogne la partie du format de la note - public Img rogneurFormatNote(BufferedImage imgOriginale) + public static Img rogneurFormatNote(BufferedImage imgOriginale) { - - // A FAIRE - //return new ImgNum(); + return imgOriginale.getSubimage(x, y, w, h); } } diff --git a/NGCC/src/ocr/diag.ucls b/NGCC/src/ocr/diag.ucls new file mode 100644 index 0000000..720ffbe --- /dev/null +++ b/NGCC/src/ocr/diag.ucls @@ -0,0 +1,10 @@ + + + + + + + + \ No newline at end of file From 2bb467163d34b2d5002bd65b5b57db80a9e1b190 Mon Sep 17 00:00:00 2001 From: Hugo Date: Fri, 27 Sep 2019 09:00:19 +0200 Subject: [PATCH 3/5] Modif ocr --- NGCC/src/ocr/Copie.java | 6 +- NGCC/src/ocr/GestionnaireCopies.java | 5 ++ NGCC/src/ocr/ImagesCopie.java | 2 +- NGCC/src/ocr/diag.ucls | 89 +++++++++++++++++++++++++++- 4 files changed, 99 insertions(+), 3 deletions(-) create mode 100644 NGCC/src/ocr/GestionnaireCopies.java diff --git a/NGCC/src/ocr/Copie.java b/NGCC/src/ocr/Copie.java index 56773a4..b4ec6ae 100644 --- a/NGCC/src/ocr/Copie.java +++ b/NGCC/src/ocr/Copie.java @@ -1,8 +1,12 @@ package ocr; +import java.awt.image.BufferedImage; + public class Copie { ImagesCopie base; - + public Copie(BufferedImage img) { + this.base = new ImagesCopie(img); + } } diff --git a/NGCC/src/ocr/GestionnaireCopies.java b/NGCC/src/ocr/GestionnaireCopies.java new file mode 100644 index 0000000..c88977b --- /dev/null +++ b/NGCC/src/ocr/GestionnaireCopies.java @@ -0,0 +1,5 @@ +package ocr; + +public class GestionnaireCopies { + +} diff --git a/NGCC/src/ocr/ImagesCopie.java b/NGCC/src/ocr/ImagesCopie.java index 39bb319..1807497 100644 --- a/NGCC/src/ocr/ImagesCopie.java +++ b/NGCC/src/ocr/ImagesCopie.java @@ -3,7 +3,7 @@ package ocr; import java.awt.image.BufferedImage; import java.util.Map; -public abstract class ImagesCopie { +public class ImagesCopie { private Map hMapImgs; diff --git a/NGCC/src/ocr/diag.ucls b/NGCC/src/ocr/diag.ucls index 720ffbe..de3b52c 100644 --- a/NGCC/src/ocr/diag.ucls +++ b/NGCC/src/ocr/diag.ucls @@ -1,6 +1,93 @@ + associations="true" dependencies="false" nesting-relationships="true" router="FAN"> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From a6e9730355145ab94d62aef40377bccc5443666d Mon Sep 17 00:00:00 2001 From: Hugo Date: Fri, 27 Sep 2019 09:42:51 +0200 Subject: [PATCH 4/5] finalisation de ocr ? --- NGCC/src/ocr/Copie.java | 10 +++ NGCC/src/ocr/GestionnaireCopies.java | 65 +++++++++++++++++ NGCC/src/ocr/ImagesCopie.java | 10 +++ NGCC/src/ocr/PdfToImage.java | 101 +++++++++++++++++++++++++++ NGCC/src/ocr/Rogneur.java | 20 ++++-- 5 files changed, 199 insertions(+), 7 deletions(-) create mode 100644 NGCC/src/ocr/PdfToImage.java diff --git a/NGCC/src/ocr/Copie.java b/NGCC/src/ocr/Copie.java index b4ec6ae..583bed6 100644 --- a/NGCC/src/ocr/Copie.java +++ b/NGCC/src/ocr/Copie.java @@ -9,4 +9,14 @@ public class Copie { public Copie(BufferedImage img) { this.base = new ImagesCopie(img); } + + public ImagesCopie getBase() { + return base; + } + + public void setBase(ImagesCopie base) { + this.base = base; + } + + } diff --git a/NGCC/src/ocr/GestionnaireCopies.java b/NGCC/src/ocr/GestionnaireCopies.java index c88977b..6971731 100644 --- a/NGCC/src/ocr/GestionnaireCopies.java +++ b/NGCC/src/ocr/GestionnaireCopies.java @@ -1,5 +1,70 @@ package ocr; +import java.awt.image.BufferedImage; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.pdfbox.pdmodel.PDDocument; + + public class GestionnaireCopies { + private List listeCopie; + + private List copies; + + public GestionnaireCopies(String chemin) { + + copies = createImagesCopies(chemin); + listeCopie = new ArrayList(); + + for(BufferedImage i : copies) + { + listeCopie.add(new Copie(i)); + } + } + + public List createImagesCopies(String path){ + + PdfToImage pdfAnalyzer = new PdfToImage(); + File pdfFile; + PDDocument document = null; + //LISTE DES IMAGES + List images = new ArrayList<>(); // stockera les images (resultat) + // CONVERT PAGES TO IMAGES + try { + String pdfFilesDirectory = "C:\\Users\\ph807242\\eclipse-workspace\\PT\\pdf\\"; + // nom du fichier pdf à ouvrir (TODO: changer le chemin) + List files = pdfAnalyzer.listAllFiles(pdfFilesDirectory, ".pdf"); + for (String fname : files) { + pdfFile = new File(fname); + document = PDDocument.load(pdfFile); // charge le fichier pdf cree pour le traiter + images.addAll(pdfAnalyzer.convertPagesToBWJPG(document)); + // appelle la methode qui convertit les pages en images (jpg) noir et blanches + } + } catch (IOException e) { + System.out.println(e); + } + + //LISTE DES IMAGES COMPRENANT L'IMAGE DE LA NOTE ET DU NUM ETUDIANT + + return images; + + } + + public Map createHashMapforCSV(){ + + Map temp = new HashMap<>(); + for(Copie c : listeCopie) + { + temp.put(c.getBase().gethMapImgs().get("NumEtu").getDescription(), c.getBase().gethMapImgs().get("Note").getDescription()); + } + return temp; + + } + } diff --git a/NGCC/src/ocr/ImagesCopie.java b/NGCC/src/ocr/ImagesCopie.java index 1807497..1414cf6 100644 --- a/NGCC/src/ocr/ImagesCopie.java +++ b/NGCC/src/ocr/ImagesCopie.java @@ -21,4 +21,14 @@ public class ImagesCopie { hMapImgs.get(s).applyOcrImg(); } } + + public Map gethMapImgs() { + return hMapImgs; + } + + public void sethMapImgs(Map hMapImgs) { + this.hMapImgs = hMapImgs; + } + + } diff --git a/NGCC/src/ocr/PdfToImage.java b/NGCC/src/ocr/PdfToImage.java new file mode 100644 index 0000000..b80295b --- /dev/null +++ b/NGCC/src/ocr/PdfToImage.java @@ -0,0 +1,101 @@ +package ocr; +import java.awt.image.BufferedImage; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import javax.imageio.ImageIO; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.rendering.ImageType; +import org.apache.pdfbox.rendering.PDFRenderer; + +public class PdfToImage { + + public List listAllFiles(String directory, String extension) { + // https://www.mkyong.com/java/java-how-to-list-all-files-in-a-directory/ + List files = new ArrayList(); + try (Stream walk = Files.walk(Paths.get(directory))) { + // voir simplification si necessaire + files = walk.map(x -> x.toString()).filter(f -> f.endsWith(extension)).collect(Collectors.toList()); + } catch (IOException ioe) { + ioe.printStackTrace(); + } + return files; + } + + public BufferedImage blackWhiteConvert(BufferedImage image) { + // Convertit une image en image en noir et blanc + // TODO : voir recursivite + int width = image.getWidth(); + int height = image.getHeight(); + for (int x = 0; x < width; x++) { + for (int y = 0; y < height; y++) { + if (image.getRGB(x, y) < 128) { + image.setRGB(x, y, 0); + } else { + image.setRGB(x, y, 255); + } + } + } + return image; + } + + public boolean isBlackWhite(BufferedImage image) { + // verifie si une image est en noir et blanc + // TODO : voir recursivite + int width = image.getWidth(); + int height = image.getHeight(); + for (int x = 0; x < width; x++) { + for (int y = 0; y < height; y++) { + if ((image.getRGB(x, y) != 0) || (image.getRGB(x, y) != 255)) { + return false; + } + } + } + return true; + } + + public ArrayList convertPagesToBWJPG(PDDocument document) { + // convertit chaque page d'un document pdf en image noir et blanc + // retourne une array liste d'images + ArrayList images = new ArrayList(); + PDFRenderer pdfRenderer = new PDFRenderer(document); + try { + int pageCounter = 0; + for (PDPage page : document.getPages()) { + System.out.println("page.getRotation() : " + page.getRotation()); + System.out.println("pageCounter : " + pageCounter); + BufferedImage bim = pdfRenderer.renderImageWithDPI(pageCounter++, 300, ImageType.BINARY); // BINARY = + // noir et + // blanc + images.add(bim); + System.out.println("Ajout n°" + pageCounter); + } + // document.close(); + } catch (IOException ioe) { + ioe.printStackTrace(); + } + return images; + } + + public void saveOnDisk(ArrayList images, String originalFileDir) { + // sauvegarde sur le disque les images + int pageCounter = 0; + try { + for (BufferedImage img : images) { + ImageIO.write(img, "JPEG", new File(originalFileDir + "img_" + pageCounter++ + ".jpg")); + } + } catch (IOException ioe) { + ioe.printStackTrace(); + } + } + +} diff --git a/NGCC/src/ocr/Rogneur.java b/NGCC/src/ocr/Rogneur.java index 3063b9a..1826bb2 100644 --- a/NGCC/src/ocr/Rogneur.java +++ b/NGCC/src/ocr/Rogneur.java @@ -10,9 +10,9 @@ public class Rogneur { public static Map createHMapImgs(BufferedImage imgOriginale) { Map temp = new HashMap<>(); - temp.put("NumEtu", rogneurFormatNote(imgOriginale)); - temp.put("Note", rogneurFormatNote(imgOriginale)); - temp.put("FormatNote", rogneurFormatNote(imgOriginale)); + temp.put("NumEtu", rogneurNumEtu(imgOriginale)); + temp.put("Note", rogneurNote(imgOriginale)); + //temp.put("FormatNote", rogneurFormatNote(imgOriginale)); return temp; } @@ -20,18 +20,24 @@ public class Rogneur { // rogne la partie du numEtu public static Img rogneurNumEtu(BufferedImage imgOriginale) { - return imgOriginale.getSubimage(x, y, w, h); + return (new ImgNumEtu(imgOriginale.getSubimage((imgOriginale.getWidth()/4)+4 + , imgOriginale.getHeight()-imgOriginale.getHeight()+115 + , (imgOriginale.getWidth()/4+150)-(imgOriginale.getWidth()/4+4) + , imgOriginale.getHeight()-imgOriginale.getHeight()+146-(imgOriginale.getHeight()-imgOriginale.getHeight()+115) ))); } // rogne la partie de la note public static Img rogneurNote(BufferedImage imgOriginale) { - return imgOriginale.getSubimage(x, y, w, h); + return (new ImgNumEtu(imgOriginale.getSubimage((imgOriginale.getWidth()/4)+4 + , imgOriginale.getHeight()-imgOriginale.getHeight()+160 + , (imgOriginale.getWidth()/4+150)-(imgOriginale.getWidth()/4+4) + , imgOriginale.getHeight()-imgOriginale.getHeight()+200-(imgOriginale.getHeight()-imgOriginale.getHeight()+160) ))); } // rogne la partie du format de la note - public static Img rogneurFormatNote(BufferedImage imgOriginale) + /*public static Img rogneurFormatNote(BufferedImage imgOriginale) { return imgOriginale.getSubimage(x, y, w, h); - } + }*/ } From ea240a03333a2673f56df32e79f01b1a49b7d19e Mon Sep 17 00:00:00 2001 From: Gross Date: Fri, 27 Sep 2019 10:00:48 +0200 Subject: [PATCH 5/5] Suppression ancien ocr --- NGCC/src/ocr_orm/ControleurOCR.java | 46 ------------ NGCC/src/ocr_orm/ImageNGCC.java | 37 ---------- NGCC/src/ocr_orm/ListeImageNGCC.java | 46 ------------ NGCC/src/ocr_orm/Main.java | 46 ------------ NGCC/src/ocr_orm/OCR.java | 28 -------- NGCC/src/ocr_orm/PdfToImage.java | 101 --------------------------- 6 files changed, 304 deletions(-) delete mode 100644 NGCC/src/ocr_orm/ControleurOCR.java delete mode 100644 NGCC/src/ocr_orm/ImageNGCC.java delete mode 100644 NGCC/src/ocr_orm/ListeImageNGCC.java delete mode 100644 NGCC/src/ocr_orm/Main.java delete mode 100644 NGCC/src/ocr_orm/OCR.java delete mode 100644 NGCC/src/ocr_orm/PdfToImage.java diff --git a/NGCC/src/ocr_orm/ControleurOCR.java b/NGCC/src/ocr_orm/ControleurOCR.java deleted file mode 100644 index 3016f32..0000000 --- a/NGCC/src/ocr_orm/ControleurOCR.java +++ /dev/null @@ -1,46 +0,0 @@ -package ocr_orm; -import java.awt.image.BufferedImage; -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; - - -import org.apache.pdfbox.pdmodel.PDDocument; - -public class ControleurOCR { - - public HashMap getNumNote(String path){ - - PdfToImage pdfAnalyzer = new PdfToImage(); - File pdfFile; - PDDocument document = null; - //LISTE DES IMAGES - ArrayList images = new ArrayList<>(); // stockera les images (resultat) - //HASHMAP POUR LE CSV - HashMap listeNumNote = new HashMap(); - - // CONVERT PAGES TO IMAGES - try { - String pdfFilesDirectory = "C:\\Users\\ph807242\\eclipse-workspace\\PT\\pdf\\"; - // nom du fichier pdf à ouvrir (TODO: changer le chemin) - List files = pdfAnalyzer.listAllFiles(pdfFilesDirectory, ".pdf"); - for (String fname : files) { - pdfFile = new File(fname); - document = PDDocument.load(pdfFile); // charge le fichier pdf cree pour le traiter - images.addAll(pdfAnalyzer.convertPagesToBWJPG(document)); - // appelle la methode qui convertit les pages en images (jpg) noir et blanches - } - } catch (IOException e) { - System.out.println(e); - } - - //LISTE DES IMAGES COMPRENANT L'IMAGE DE LA NOTE ET DU NUM ETUDIANT - ListeImageNGCC liNGCC = new ListeImageNGCC(images); - - listeNumNote = liNGCC.doOCR(); - return listeNumNote; - - } -} diff --git a/NGCC/src/ocr_orm/ImageNGCC.java b/NGCC/src/ocr_orm/ImageNGCC.java deleted file mode 100644 index d1755e0..0000000 --- a/NGCC/src/ocr_orm/ImageNGCC.java +++ /dev/null @@ -1,37 +0,0 @@ -package ocr_orm; - -import java.awt.image.*; - -import javax.imageio.ImageIO; - -import org.apache.pdfbox.rendering.ImageType; - -import java.awt.Image; -@SuppressWarnings("unused") -public class ImageNGCC { - - BufferedImage imgcopie ; - BufferedImage imgNumEtu; - BufferedImage imgNote; - - public ImageNGCC(BufferedImage buf){ - imgcopie=buf; - imgNumEtu = imgcopie.getSubimage(0,0, 100, 50); - imgNote = imgcopie.getSubimage(50,50,100,50); - } - - public BufferedImage getImgNumEtu() { - return imgNumEtu; - } - - public BufferedImage getImgNote() { - return imgNote; - } - - - - - - -} -//https://docs.oracle.com/javase/tutorial/2d/images/drawimage.html \ No newline at end of file diff --git a/NGCC/src/ocr_orm/ListeImageNGCC.java b/NGCC/src/ocr_orm/ListeImageNGCC.java deleted file mode 100644 index 1db1505..0000000 --- a/NGCC/src/ocr_orm/ListeImageNGCC.java +++ /dev/null @@ -1,46 +0,0 @@ -package ocr_orm; - - -import java.awt.image.BufferedImage; -import java.util.ArrayList; -import java.util.HashMap; -; - - - -public class ListeImageNGCC { - - private ArrayList listeImage = new ArrayList(); - - - public ListeImageNGCC(ArrayList liste) { - - for(int i=0; i getListeImage(ArrayList liste) { - return listeImage; - } - - public int taille() { - return listeImage.size(); - } - - public HashMap doOCR(){ - HashMap maMap = new HashMap(); - OCR ocr = new OCR(); - for (int i=0;i images = new ArrayList<>(); // stockera les images (resultat) - //HASHMAP POUR LE CSV - HashMap listeNumNote = new HashMap(); - - // CONVERT PAGES TO IMAGES - try { - String pdfFilesDirectory = "C:\\Users\\ph807242\\eclipse-workspace\\PT\\pdf\\"; - // nom du fichier pdf à ouvrir (TODO: changer le chemin) - List files = pdfAnalyzer.listAllFiles(pdfFilesDirectory, ".pdf"); - for (String fname : files) { - pdfFile = new File(fname); - document = PDDocument.load(pdfFile); // charge le fichier pdf cree pour le traiter - images.addAll(pdfAnalyzer.convertPagesToBWJPG(document)); - // appelle la methode qui convertit les pages en images (jpg) noir et blanches - } - } catch (IOException e) { - System.out.println(e); - } - - //LISTE DES IMAGES COMPRENANT L'IMAGE DE LA NOTE ET DU NUM ETUDIANT - ListeImageNGCC liNGCC = new ListeImageNGCC(images); - - listeNumNote = liNGCC.doOCR(); - - - } -} diff --git a/NGCC/src/ocr_orm/OCR.java b/NGCC/src/ocr_orm/OCR.java deleted file mode 100644 index 8db7c89..0000000 --- a/NGCC/src/ocr_orm/OCR.java +++ /dev/null @@ -1,28 +0,0 @@ -package ocr_orm; -import java.awt.image.BufferedImage; - - -import net.sourceforge.tess4j.Tesseract; -import net.sourceforge.tess4j.TesseractException; - -public class OCR { - - - - public String getOCR(BufferedImage img) { - //FAIRE L'OCR - Tesseract tesseract = new Tesseract(); - String str=""; - try { - tesseract.setOcrEngineMode(2); - tesseract.setTessVariable("tessedit_char_whitelist", "A-Za-z1-9"); - str=tesseract.doOCR(img); - } catch (TesseractException e) { - - e.printStackTrace(); - } - - - return str; - } -} diff --git a/NGCC/src/ocr_orm/PdfToImage.java b/NGCC/src/ocr_orm/PdfToImage.java deleted file mode 100644 index 6c6f8e7..0000000 --- a/NGCC/src/ocr_orm/PdfToImage.java +++ /dev/null @@ -1,101 +0,0 @@ -package ocr_orm; -import java.awt.image.BufferedImage; -import java.io.File; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.List; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -import javax.imageio.ImageIO; - -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.PDPage; -import org.apache.pdfbox.rendering.ImageType; -import org.apache.pdfbox.rendering.PDFRenderer; - -public class PdfToImage { - - public List listAllFiles(String directory, String extension) { - // https://www.mkyong.com/java/java-how-to-list-all-files-in-a-directory/ - List files = new ArrayList(); - try (Stream walk = Files.walk(Paths.get(directory))) { - // voir simplification si necessaire - files = walk.map(x -> x.toString()).filter(f -> f.endsWith(extension)).collect(Collectors.toList()); - } catch (IOException ioe) { - ioe.printStackTrace(); - } - return files; - } - - public BufferedImage blackWhiteConvert(BufferedImage image) { - // Convertit une image en image en noir et blanc - // TODO : voir recursivite - int width = image.getWidth(); - int height = image.getHeight(); - for (int x = 0; x < width; x++) { - for (int y = 0; y < height; y++) { - if (image.getRGB(x, y) < 128) { - image.setRGB(x, y, 0); - } else { - image.setRGB(x, y, 255); - } - } - } - return image; - } - - public boolean isBlackWhite(BufferedImage image) { - // verifie si une image est en noir et blanc - // TODO : voir recursivite - int width = image.getWidth(); - int height = image.getHeight(); - for (int x = 0; x < width; x++) { - for (int y = 0; y < height; y++) { - if ((image.getRGB(x, y) != 0) || (image.getRGB(x, y) != 255)) { - return false; - } - } - } - return true; - } - - public ArrayList convertPagesToBWJPG(PDDocument document) { - // convertit chaque page d'un document pdf en image noir et blanc - // retourne une array liste d'images - ArrayList images = new ArrayList(); - PDFRenderer pdfRenderer = new PDFRenderer(document); - try { - int pageCounter = 0; - for (PDPage page : document.getPages()) { - System.out.println("page.getRotation() : " + page.getRotation()); - System.out.println("pageCounter : " + pageCounter); - BufferedImage bim = pdfRenderer.renderImageWithDPI(pageCounter++, 300, ImageType.BINARY); // BINARY = - // noir et - // blanc - images.add(bim); - System.out.println("Ajout n°" + pageCounter); - } - // document.close(); - } catch (IOException ioe) { - ioe.printStackTrace(); - } - return images; - } - - public void saveOnDisk(ArrayList images, String originalFileDir) { - // sauvegarde sur le disque les images - int pageCounter = 0; - try { - for (BufferedImage img : images) { - ImageIO.write(img, "JPEG", new File(originalFileDir + "img_" + pageCounter++ + ".jpg")); - } - } catch (IOException ioe) { - ioe.printStackTrace(); - } - } - -}