Java OCR Web Project – Tesseract Optical Character Recoginition(OCR)
This tutorial shows JAVA program which can be used to convert Image To Text pragmatically. First you have to install ‘tesseract-ocr-setup-3.02.02.exe’ in the location C:\Tesseract-OCR for this example.
- Project Structure in Eclipse
- You can update the location you installed the OCR here and the local host server information.
Common_Things.javapackage common_things; public class Common_Things { public static String url="http://localhost:8080/OCR"; public static String upload_path="C:\\Tesseract-OCR"; }
- This class does the CMD prompt commands to generate out put file after processing OCR. The output file is generated is C:\Tesseract-OCR by default. You can change it if you want in this class.
create_file.javapackage OCR; import java.io.PrintWriter; public class create_file { public void create_output_file(String file_name) { String[] command = { "cmd", }; Process p; try { p = Runtime.getRuntime().exec(command); new Thread(new SyncPipe(p.getErrorStream(), System.err)).start(); new Thread(new SyncPipe(p.getInputStream(), System.out)).start(); PrintWriter stdin = new PrintWriter(p.getOutputStream()); stdin.println("cd.."); stdin.println("cd.."); stdin.println("cd.."); stdin.println("cd.."); stdin.println("c:"); stdin.println("cd.."); stdin.println("cd.."); stdin.println("cd.."); stdin.println("cd Tesseract-OCR "); stdin.println("tesseract "+file_name+" "+file_name); stdin.close(); int returnCode = p.waitFor(); // System.out.println("Return code = " + returnCode); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
- This class is for supporting CMD operations from JAVA program.
SyncPipe.javapackage OCR; import java.io.InputStream; import java.io.OutputStream; class SyncPipe implements Runnable { public SyncPipe(InputStream istrm, OutputStream ostrm) { istrm_ = istrm; ostrm_ = ostrm; } public void run() { try { final byte[] buffer = new byte[1024]; for (int length = 0; (length = istrm_.read(buffer)) != -1; ) { ostrm_.write(buffer, 0, length); } } catch (Exception e) { e.printStackTrace(); } } private final OutputStream ostrm_; private final InputStream istrm_; }
- This class uploads the image to process to the location specified in Common_Things.java, For us the location is C:\Tesseract-OCR
FileUploadHandler.javapackage Upload_Data; import java.io.File; import java.io.IOException; import java.io.PrintWriter; import java.sql.Connection; import java.sql.PreparedStatement; import java.util.Iterator; import java.util.List; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.apache.commons.fileupload.FileItem; import org.apache.commons.fileupload.FileItemFactory; import org.apache.commons.fileupload.disk.DiskFileItemFactory; import org.apache.commons.fileupload.servlet.ServletFileUpload; import common_things.Common_Things; public class FileUploadHandler extends HttpServlet { private static final long serialVersionUID = 1L; public void doGet(HttpServletRequest request, HttpServletResponse response) throws IOException { doPost(request, response); } public void doPost(HttpServletRequest request, HttpServletResponse response) throws IOException { Connection connection=null; PreparedStatement ps=null; String file_name=null; String file_name_given=null; response.setContentType("text/html"); PrintWriter out = response.getWriter(); boolean isMultipartContent = ServletFileUpload.isMultipartContent(request); if (!isMultipartContent) { return; } FileItemFactory factory = new DiskFileItemFactory(); ServletFileUpload upload = new ServletFileUpload(factory); try { List fields = upload.parseRequest(request); Iterator it = fields.iterator(); if (!it.hasNext()) { return; } while (it.hasNext()) { FileItem fileItem = it.next(); boolean isFormField = fileItem.isFormField(); if (isFormField) { if(file_name==null){ if(fileItem.getFieldName().equals("file_name")){ file_name=fileItem.getString(); } } if(file_name_given==null){ if(fileItem.getFieldName().equals("file_name_given")){ file_name_given=fileItem.getString(); } } } else { if(fileItem.getSize()>0){ file_name=fileItem.getName(); fileItem.write( new File(Common_Things.upload_path + File.separator + fileItem.getName())); } } } } catch (Exception e) { e.printStackTrace(); }finally{ if(connection!=null){ try { connection.close(); System.out.println("connection closed"); } catch (Exception e2) { e2.printStackTrace(); } } if(ps!=null){ try { ps.close(); } catch (Exception e2) { e2.printStackTrace(); } } out.println(""); out.close(); } } }
- web.xml
<?xml version="1.0" encoding="UTF-8"?> <web-app id="WebApp_ID" version="2.4" xmlns="http://java.sun.com/xml/ns/j2ee" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://java.sun.com/xml/ns/j2ee http://java.sun.com/xml/ns/j2ee/web-app_2_4.xsd"> <display-name> OCR</display-name> <welcome-file-list> <welcome-file>index.html</welcome-file> <welcome-file>index.htm</welcome-file> <welcome-file>index.jsp</welcome-file> <welcome-file>default.html</welcome-file> <welcome-file>default.htm</welcome-file> <welcome-file>default.jsp</welcome-file> </welcome-file-list> <servlet> <servlet-name>FileUploadHandler</servlet-name> <servlet-class>Upload_Data.FileUploadHandler</servlet-class> </servlet> <servlet-mapping> <servlet-name>FileUploadHandler</servlet-name> <url-pattern>/FileUploadHandler/*</url-pattern> </servlet-mapping> </web-app>
- index.jsp
<%@ page language="java" contentType="text/html; charset=ISO-8859-1" pageEncoding="ISO-8859-1"%> <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> <%@page import="common_things.Common_Things"%><html> <head> <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> <!-- meta --> <meta charset="utf-8"> </head> <body> <form action="<%=Common_Things.url %>/FileUploadHandler/" enctype="multipart/form-data" method="post"> Select<input type="file" name="file2" /><br> File name <input type="text" name="file_name_given"> <br><br> <input type="submit" value="upload" /> </form> </body> </html>
- read.jsp
<%@ page language="java" contentType="text/html; charset=ISO-8859-1" pageEncoding="ISO-8859-1"%> <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> <%@page import="common_things.Common_Things"%> <%@page import="OCR.create_file"%> <%@page import="OCR.read_file"%><html> <head> <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> <!-- meta --> <meta charset="utf-8"> </head> <body> <% String file_name=(String)request.getParameter("file_name"); String file_name_given=(String)request.getParameter("file_name_given"); //creating the out put file after upload and reading create_file obj_create_file=new create_file(); obj_create_file.create_output_file(file_name); //read the created out put file read_file obj_read_file=new read_file(); String read=obj_read_file.read_file(file_name); %> <%=file_name %><br> <%=file_name_given %><br> <br> <br> <hr> <%=read %> <hr> <br><br> <br><br><br> read success!!... </body> </html>
-
testimage.png
- index.jsp after running the project
-
The uploaded file will be present in C:\Tesseract-OCR.
read.jsp after successful read operation. -
Download the project here,
Download OCR.rar
0 Comments
Comments
Leave a Comment