From e8f5de695dd940065ef5c72bc0e505b8c4b91fdd Mon Sep 17 00:00:00 2001 From: Hugo Date: Mon, 23 Sep 2019 10:20:28 +0200 Subject: [PATCH] =?UTF-8?q?Ajout=20des=20m=C3=A9thodes=20concernant=20l'oc?= =?UTF-8?q?r(reste=20=C3=A0=20remplir=20la=20fonction=20qui=20lit=20les=20?= =?UTF-8?q?images?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- NGCC/src/ocr_orm/ImageNGCC.java | 37 ++++++++++ NGCC/src/ocr_orm/ListeImageNGCC.java | 46 ++++++++++++ NGCC/src/ocr_orm/Main.java | 46 ++++++++++++ NGCC/src/ocr_orm/OCR.java | 18 +++++ NGCC/src/ocr_orm/PdfToImage.java | 101 +++++++++++++++++++++++++++ 5 files changed, 248 insertions(+) create mode 100644 NGCC/src/ocr_orm/ImageNGCC.java create mode 100644 NGCC/src/ocr_orm/ListeImageNGCC.java create mode 100644 NGCC/src/ocr_orm/Main.java create mode 100644 NGCC/src/ocr_orm/OCR.java create mode 100644 NGCC/src/ocr_orm/PdfToImage.java diff --git a/NGCC/src/ocr_orm/ImageNGCC.java b/NGCC/src/ocr_orm/ImageNGCC.java new file mode 100644 index 0000000..d1755e0 --- /dev/null +++ b/NGCC/src/ocr_orm/ImageNGCC.java @@ -0,0 +1,37 @@ +package ocr_orm; + +import java.awt.image.*; + +import javax.imageio.ImageIO; + +import org.apache.pdfbox.rendering.ImageType; + +import java.awt.Image; +@SuppressWarnings("unused") +public class ImageNGCC { + + BufferedImage imgcopie ; + BufferedImage imgNumEtu; + BufferedImage imgNote; + + public ImageNGCC(BufferedImage buf){ + imgcopie=buf; + imgNumEtu = imgcopie.getSubimage(0,0, 100, 50); + imgNote = imgcopie.getSubimage(50,50,100,50); + } + + public BufferedImage getImgNumEtu() { + return imgNumEtu; + } + + public BufferedImage getImgNote() { + return imgNote; + } + + + + + + +} +//https://docs.oracle.com/javase/tutorial/2d/images/drawimage.html \ No newline at end of file diff --git a/NGCC/src/ocr_orm/ListeImageNGCC.java b/NGCC/src/ocr_orm/ListeImageNGCC.java new file mode 100644 index 0000000..1db1505 --- /dev/null +++ b/NGCC/src/ocr_orm/ListeImageNGCC.java @@ -0,0 +1,46 @@ +package ocr_orm; + + +import java.awt.image.BufferedImage; +import java.util.ArrayList; +import java.util.HashMap; +; + + + +public class ListeImageNGCC { + + private ArrayList listeImage = new ArrayList(); + + + public ListeImageNGCC(ArrayList liste) { + + for(int i=0; i getListeImage(ArrayList liste) { + return listeImage; + } + + public int taille() { + return listeImage.size(); + } + + public HashMap doOCR(){ + HashMap maMap = new HashMap(); + OCR ocr = new OCR(); + for (int i=0;i images = new ArrayList<>(); // stockera les images (resultat) + //HASHMAP POUR LE CSV + HashMap listeNumNote = new HashMap(); + + // CONVERT PAGES TO IMAGES + try { + String pdfFilesDirectory = "C:\\Users\\ph807242\\eclipse-workspace\\PT\\pdf\\"; + // nom du fichier pdf à ouvrir (TODO: changer le chemin) + List files = pdfAnalyzer.listAllFiles(pdfFilesDirectory, ".pdf"); + for (String fname : files) { + pdfFile = new File(fname); + document = PDDocument.load(pdfFile); // charge le fichier pdf cree pour le traiter + images.addAll(pdfAnalyzer.convertPagesToBWJPG(document)); + // appelle la methode qui convertit les pages en images (jpg) noir et blanches + } + } catch (IOException e) { + System.out.println(e); + } + + //LISTE DES IMAGES COMPRENANT L'IMAGE DE LA NOTE ET DU NUM ETUDIANT + ListeImageNGCC liNGCC = new ListeImageNGCC(images); + + listeNumNote = liNGCC.doOCR(); + + + } +} diff --git a/NGCC/src/ocr_orm/OCR.java b/NGCC/src/ocr_orm/OCR.java new file mode 100644 index 0000000..6195ce6 --- /dev/null +++ b/NGCC/src/ocr_orm/OCR.java @@ -0,0 +1,18 @@ +package ocr_orm; +import java.awt.image.BufferedImage; +import java.io.File; + +import net.sourceforge.tess4j.Tesseract; +import net.sourceforge.tess4j.TesseractException; + +public class OCR { + + + + public String getOCR(BufferedImage img) { + //FAIRE L'OCR + String str=""; + + return str; + } +} diff --git a/NGCC/src/ocr_orm/PdfToImage.java b/NGCC/src/ocr_orm/PdfToImage.java new file mode 100644 index 0000000..6c6f8e7 --- /dev/null +++ b/NGCC/src/ocr_orm/PdfToImage.java @@ -0,0 +1,101 @@ +package ocr_orm; +import java.awt.image.BufferedImage; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import javax.imageio.ImageIO; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.rendering.ImageType; +import org.apache.pdfbox.rendering.PDFRenderer; + +public class PdfToImage { + + public List listAllFiles(String directory, String extension) { + // https://www.mkyong.com/java/java-how-to-list-all-files-in-a-directory/ + List files = new ArrayList(); + try (Stream walk = Files.walk(Paths.get(directory))) { + // voir simplification si necessaire + files = walk.map(x -> x.toString()).filter(f -> f.endsWith(extension)).collect(Collectors.toList()); + } catch (IOException ioe) { + ioe.printStackTrace(); + } + return files; + } + + public BufferedImage blackWhiteConvert(BufferedImage image) { + // Convertit une image en image en noir et blanc + // TODO : voir recursivite + int width = image.getWidth(); + int height = image.getHeight(); + for (int x = 0; x < width; x++) { + for (int y = 0; y < height; y++) { + if (image.getRGB(x, y) < 128) { + image.setRGB(x, y, 0); + } else { + image.setRGB(x, y, 255); + } + } + } + return image; + } + + public boolean isBlackWhite(BufferedImage image) { + // verifie si une image est en noir et blanc + // TODO : voir recursivite + int width = image.getWidth(); + int height = image.getHeight(); + for (int x = 0; x < width; x++) { + for (int y = 0; y < height; y++) { + if ((image.getRGB(x, y) != 0) || (image.getRGB(x, y) != 255)) { + return false; + } + } + } + return true; + } + + public ArrayList convertPagesToBWJPG(PDDocument document) { + // convertit chaque page d'un document pdf en image noir et blanc + // retourne une array liste d'images + ArrayList images = new ArrayList(); + PDFRenderer pdfRenderer = new PDFRenderer(document); + try { + int pageCounter = 0; + for (PDPage page : document.getPages()) { + System.out.println("page.getRotation() : " + page.getRotation()); + System.out.println("pageCounter : " + pageCounter); + BufferedImage bim = pdfRenderer.renderImageWithDPI(pageCounter++, 300, ImageType.BINARY); // BINARY = + // noir et + // blanc + images.add(bim); + System.out.println("Ajout n°" + pageCounter); + } + // document.close(); + } catch (IOException ioe) { + ioe.printStackTrace(); + } + return images; + } + + public void saveOnDisk(ArrayList images, String originalFileDir) { + // sauvegarde sur le disque les images + int pageCounter = 0; + try { + for (BufferedImage img : images) { + ImageIO.write(img, "JPEG", new File(originalFileDir + "img_" + pageCounter++ + ".jpg")); + } + } catch (IOException ioe) { + ioe.printStackTrace(); + } + } + +}