diff --git a/NGCC/src/ocr_orm/ControleurOCR.java b/NGCC/src/ocr_orm/ControleurOCR.java deleted file mode 100644 index 3016f32..0000000 --- a/NGCC/src/ocr_orm/ControleurOCR.java +++ /dev/null @@ -1,46 +0,0 @@ -package ocr_orm; -import java.awt.image.BufferedImage; -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; - - -import org.apache.pdfbox.pdmodel.PDDocument; - -public class ControleurOCR { - - public HashMap getNumNote(String path){ - - PdfToImage pdfAnalyzer = new PdfToImage(); - File pdfFile; - PDDocument document = null; - //LISTE DES IMAGES - ArrayList images = new ArrayList<>(); // stockera les images (resultat) - //HASHMAP POUR LE CSV - HashMap listeNumNote = new HashMap(); - - // CONVERT PAGES TO IMAGES - try { - String pdfFilesDirectory = "C:\\Users\\ph807242\\eclipse-workspace\\PT\\pdf\\"; - // nom du fichier pdf à ouvrir (TODO: changer le chemin) - List files = pdfAnalyzer.listAllFiles(pdfFilesDirectory, ".pdf"); - for (String fname : files) { - pdfFile = new File(fname); - document = PDDocument.load(pdfFile); // charge le fichier pdf cree pour le traiter - images.addAll(pdfAnalyzer.convertPagesToBWJPG(document)); - // appelle la methode qui convertit les pages en images (jpg) noir et blanches - } - } catch (IOException e) { - System.out.println(e); - } - - //LISTE DES IMAGES COMPRENANT L'IMAGE DE LA NOTE ET DU NUM ETUDIANT - ListeImageNGCC liNGCC = new ListeImageNGCC(images); - - listeNumNote = liNGCC.doOCR(); - return listeNumNote; - - } -} diff --git a/NGCC/src/ocr_orm/ImageNGCC.java b/NGCC/src/ocr_orm/ImageNGCC.java deleted file mode 100644 index d1755e0..0000000 --- a/NGCC/src/ocr_orm/ImageNGCC.java +++ /dev/null @@ -1,37 +0,0 @@ -package ocr_orm; - -import java.awt.image.*; - -import javax.imageio.ImageIO; - -import org.apache.pdfbox.rendering.ImageType; - -import java.awt.Image; -@SuppressWarnings("unused") -public class ImageNGCC { - - BufferedImage imgcopie ; - BufferedImage imgNumEtu; - BufferedImage imgNote; - - public ImageNGCC(BufferedImage buf){ - imgcopie=buf; - imgNumEtu = imgcopie.getSubimage(0,0, 100, 50); - imgNote = imgcopie.getSubimage(50,50,100,50); - } - - public BufferedImage getImgNumEtu() { - return imgNumEtu; - } - - public BufferedImage getImgNote() { - return imgNote; - } - - - - - - -} -//https://docs.oracle.com/javase/tutorial/2d/images/drawimage.html \ No newline at end of file diff --git a/NGCC/src/ocr_orm/ListeImageNGCC.java b/NGCC/src/ocr_orm/ListeImageNGCC.java deleted file mode 100644 index 1db1505..0000000 --- a/NGCC/src/ocr_orm/ListeImageNGCC.java +++ /dev/null @@ -1,46 +0,0 @@ -package ocr_orm; - - -import java.awt.image.BufferedImage; -import java.util.ArrayList; -import java.util.HashMap; -; - - - -public class ListeImageNGCC { - - private ArrayList listeImage = new ArrayList(); - - - public ListeImageNGCC(ArrayList liste) { - - for(int i=0; i getListeImage(ArrayList liste) { - return listeImage; - } - - public int taille() { - return listeImage.size(); - } - - public HashMap doOCR(){ - HashMap maMap = new HashMap(); - OCR ocr = new OCR(); - for (int i=0;i images = new ArrayList<>(); // stockera les images (resultat) - //HASHMAP POUR LE CSV - HashMap listeNumNote = new HashMap(); - - // CONVERT PAGES TO IMAGES - try { - String pdfFilesDirectory = "C:\\Users\\ph807242\\eclipse-workspace\\PT\\pdf\\"; - // nom du fichier pdf à ouvrir (TODO: changer le chemin) - List files = pdfAnalyzer.listAllFiles(pdfFilesDirectory, ".pdf"); - for (String fname : files) { - pdfFile = new File(fname); - document = PDDocument.load(pdfFile); // charge le fichier pdf cree pour le traiter - images.addAll(pdfAnalyzer.convertPagesToBWJPG(document)); - // appelle la methode qui convertit les pages en images (jpg) noir et blanches - } - } catch (IOException e) { - System.out.println(e); - } - - //LISTE DES IMAGES COMPRENANT L'IMAGE DE LA NOTE ET DU NUM ETUDIANT - ListeImageNGCC liNGCC = new ListeImageNGCC(images); - - listeNumNote = liNGCC.doOCR(); - - - } -} diff --git a/NGCC/src/ocr_orm/OCR.java b/NGCC/src/ocr_orm/OCR.java deleted file mode 100644 index 8db7c89..0000000 --- a/NGCC/src/ocr_orm/OCR.java +++ /dev/null @@ -1,28 +0,0 @@ -package ocr_orm; -import java.awt.image.BufferedImage; - - -import net.sourceforge.tess4j.Tesseract; -import net.sourceforge.tess4j.TesseractException; - -public class OCR { - - - - public String getOCR(BufferedImage img) { - //FAIRE L'OCR - Tesseract tesseract = new Tesseract(); - String str=""; - try { - tesseract.setOcrEngineMode(2); - tesseract.setTessVariable("tessedit_char_whitelist", "A-Za-z1-9"); - str=tesseract.doOCR(img); - } catch (TesseractException e) { - - e.printStackTrace(); - } - - - return str; - } -} diff --git a/NGCC/src/ocr_orm/PdfToImage.java b/NGCC/src/ocr_orm/PdfToImage.java deleted file mode 100644 index 6c6f8e7..0000000 --- a/NGCC/src/ocr_orm/PdfToImage.java +++ /dev/null @@ -1,101 +0,0 @@ -package ocr_orm; -import java.awt.image.BufferedImage; -import java.io.File; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.List; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -import javax.imageio.ImageIO; - -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.PDPage; -import org.apache.pdfbox.rendering.ImageType; -import org.apache.pdfbox.rendering.PDFRenderer; - -public class PdfToImage { - - public List listAllFiles(String directory, String extension) { - // https://www.mkyong.com/java/java-how-to-list-all-files-in-a-directory/ - List files = new ArrayList(); - try (Stream walk = Files.walk(Paths.get(directory))) { - // voir simplification si necessaire - files = walk.map(x -> x.toString()).filter(f -> f.endsWith(extension)).collect(Collectors.toList()); - } catch (IOException ioe) { - ioe.printStackTrace(); - } - return files; - } - - public BufferedImage blackWhiteConvert(BufferedImage image) { - // Convertit une image en image en noir et blanc - // TODO : voir recursivite - int width = image.getWidth(); - int height = image.getHeight(); - for (int x = 0; x < width; x++) { - for (int y = 0; y < height; y++) { - if (image.getRGB(x, y) < 128) { - image.setRGB(x, y, 0); - } else { - image.setRGB(x, y, 255); - } - } - } - return image; - } - - public boolean isBlackWhite(BufferedImage image) { - // verifie si une image est en noir et blanc - // TODO : voir recursivite - int width = image.getWidth(); - int height = image.getHeight(); - for (int x = 0; x < width; x++) { - for (int y = 0; y < height; y++) { - if ((image.getRGB(x, y) != 0) || (image.getRGB(x, y) != 255)) { - return false; - } - } - } - return true; - } - - public ArrayList convertPagesToBWJPG(PDDocument document) { - // convertit chaque page d'un document pdf en image noir et blanc - // retourne une array liste d'images - ArrayList images = new ArrayList(); - PDFRenderer pdfRenderer = new PDFRenderer(document); - try { - int pageCounter = 0; - for (PDPage page : document.getPages()) { - System.out.println("page.getRotation() : " + page.getRotation()); - System.out.println("pageCounter : " + pageCounter); - BufferedImage bim = pdfRenderer.renderImageWithDPI(pageCounter++, 300, ImageType.BINARY); // BINARY = - // noir et - // blanc - images.add(bim); - System.out.println("Ajout n°" + pageCounter); - } - // document.close(); - } catch (IOException ioe) { - ioe.printStackTrace(); - } - return images; - } - - public void saveOnDisk(ArrayList images, String originalFileDir) { - // sauvegarde sur le disque les images - int pageCounter = 0; - try { - for (BufferedImage img : images) { - ImageIO.write(img, "JPEG", new File(originalFileDir + "img_" + pageCounter++ + ".jpg")); - } - } catch (IOException ioe) { - ioe.printStackTrace(); - } - } - -}