Press enter to see results or esc to cancel.


Java OCR Web Project – Tesseract Optical Character Recoginition(OCR)

This tutorial shows JAVA program which can be used to convert Image To Text pragmatically. First you have to install ‘tesseract-ocr-setup-3.02.02.exe’ in the location C:\Tesseract-OCR for this example.

  1. Project Structure in Eclipse
  2. You can update the location you installed the OCR here and the local host server information.
    Common_Things.java

    package common_things;
    public class Common_Things {
       public static String url="http://localhost:8080/OCR";
       public static String upload_path="C:\\Tesseract-OCR";
    }
    
  3. This class does the CMD prompt commands to generate out put file after processing OCR. The output file is generated is C:\Tesseract-OCR by default. You can change it if you want in this class.
    create_file.java

    package OCR;
    import java.io.PrintWriter;
    public class create_file {
    	public void create_output_file(String file_name) {
    		String[] command =
    	    {
    	        "cmd",
    	    };
    	    Process p;
    		try {
    	    p = Runtime.getRuntime().exec(command);
    	    new Thread(new SyncPipe(p.getErrorStream(), System.err)).start();
    	    new Thread(new SyncPipe(p.getInputStream(), System.out)).start();
    	    PrintWriter stdin = new PrintWriter(p.getOutputStream());
    	    stdin.println("cd..");
    	    stdin.println("cd..");
    	    stdin.println("cd..");
    	    stdin.println("cd..");
    	    stdin.println("c:");
    	    stdin.println("cd..");
    	    stdin.println("cd..");
    	    stdin.println("cd..");
    	    stdin.println("cd Tesseract-OCR ");
    	    stdin.println("tesseract "+file_name+" "+file_name);
    	    stdin.close();
    	    int returnCode = p.waitFor();
    	   // System.out.println("Return code = " + returnCode);
    		} catch (Exception e) {
    			// TODO Auto-generated catch block
    			e.printStackTrace();
    		}
    	}
    }	
    
  4. This class is for supporting CMD operations from JAVA program.
    SyncPipe.java

    package OCR;
    import java.io.InputStream;
    import java.io.OutputStream;
    class SyncPipe implements Runnable
    {
    public SyncPipe(InputStream istrm, OutputStream ostrm) {
          istrm_ = istrm;
          ostrm_ = ostrm;
      }
      public void run() {
          try
          {
              final byte[] buffer = new byte[1024];
              for (int length = 0; (length = istrm_.read(buffer)) != -1; )
              {
                  ostrm_.write(buffer, 0, length);
              }
          }
          catch (Exception e)
          {
              e.printStackTrace();
          }
      }
      private final OutputStream ostrm_;
      private final InputStream istrm_;
    }
    
  5. This class uploads the image to process to the location specified in Common_Things.java, For us the location is C:\Tesseract-OCR
    FileUploadHandler.java

    package Upload_Data;
    import java.io.File;
    import java.io.IOException;
    import java.io.PrintWriter;
    import java.sql.Connection;
    import java.sql.PreparedStatement;
    import java.util.Iterator;
    import java.util.List;
    import javax.servlet.http.HttpServlet;
    import javax.servlet.http.HttpServletRequest;
    import javax.servlet.http.HttpServletResponse;
    import org.apache.commons.fileupload.FileItem;
    import org.apache.commons.fileupload.FileItemFactory;
    import org.apache.commons.fileupload.disk.DiskFileItemFactory;
    import org.apache.commons.fileupload.servlet.ServletFileUpload;
    import common_things.Common_Things;
    public class FileUploadHandler extends HttpServlet {
    	private static final long serialVersionUID = 1L;
    	public void doGet(HttpServletRequest request, HttpServletResponse response) throws IOException {
    		doPost(request, response);
    	}
    	public void doPost(HttpServletRequest request, HttpServletResponse response) throws IOException {
    		Connection connection=null;
    	  	PreparedStatement ps=null;
    	  	String file_name=null;
    	  	String file_name_given=null;
    		response.setContentType("text/html");
    		PrintWriter out = response.getWriter();
    		boolean isMultipartContent = ServletFileUpload.isMultipartContent(request);
    		if (!isMultipartContent) {
    			return;
    		}
    		FileItemFactory factory = new DiskFileItemFactory();
    		ServletFileUpload upload = new ServletFileUpload(factory);
    		try {
    			List fields = upload.parseRequest(request);
    			Iterator it = fields.iterator();
    			if (!it.hasNext()) {
    				return;
    			}
    			while (it.hasNext()) {
    				FileItem fileItem = it.next();
    				boolean isFormField = fileItem.isFormField();
    				if (isFormField) {
    					 if(file_name==null){
    			    		   if(fileItem.getFieldName().equals("file_name")){
    			    			   file_name=fileItem.getString();
    			    		   } 
    			    	   }
    					 if(file_name_given==null){
    			    		   if(fileItem.getFieldName().equals("file_name_given")){
    			    			   file_name_given=fileItem.getString();
    			    		   } 
    			    	   }
    				} else {
    	    	    	  if(fileItem.getSize()>0){
    	    	    		  file_name=fileItem.getName();
    	    	    		  fileItem.write( new File(Common_Things.upload_path + File.separator + fileItem.getName()));
    	    	    	  }
    				}
    			}
    		} catch (Exception e) {
    			e.printStackTrace();
    		}finally{
    			if(connection!=null){
    				try {
    						connection.close();
    						System.out.println("connection closed");
    					}
    				 catch (Exception e2) {
    					e2.printStackTrace();
    				}
    			}
    			if(ps!=null){
    				try {
    					ps.close();
    					}
    				 catch (Exception e2) {
    					 e2.printStackTrace();
    				}
    			}
    			out.println("");
    			out.close();
    		}
    	}
    }
    
  6. web.xml
    <?xml version="1.0" encoding="UTF-8"?>
    <web-app id="WebApp_ID" version="2.4" xmlns="http://java.sun.com/xml/ns/j2ee" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://java.sun.com/xml/ns/j2ee http://java.sun.com/xml/ns/j2ee/web-app_2_4.xsd">
    	<display-name>
    	OCR</display-name>
    	<welcome-file-list>
    	<welcome-file>index.html</welcome-file>
    		<welcome-file>index.htm</welcome-file>
    		<welcome-file>index.jsp</welcome-file>
    		<welcome-file>default.html</welcome-file>
    		<welcome-file>default.htm</welcome-file>
    		<welcome-file>default.jsp</welcome-file>
    	</welcome-file-list>
    	<servlet>
    		<servlet-name>FileUploadHandler</servlet-name>
    		<servlet-class>Upload_Data.FileUploadHandler</servlet-class>
    	</servlet>
    	<servlet-mapping>
    		<servlet-name>FileUploadHandler</servlet-name>
    		<url-pattern>/FileUploadHandler/*</url-pattern>
    	</servlet-mapping>
    </web-app>
    
  7. index.jsp
     	<%@ page language="java" contentType="text/html; charset=ISO-8859-1"
    pageEncoding="ISO-8859-1"%>
    <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
    <%@page import="common_things.Common_Things"%><html>
    <head>
    <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
    <!-- meta -->
    <meta charset="utf-8">
    </head>
    <body>
          <form action="<%=Common_Things.url %>/FileUploadHandler/" enctype="multipart/form-data" method="post">
                Select<input type="file" name="file2" /><br>
                File name <input type="text" name="file_name_given"> <br><br>
                <input type="submit" value="upload" />
          </form>
    </body>
    </html>
    
  8. read.jsp
     	<%@ page language="java" contentType="text/html; charset=ISO-8859-1"
    pageEncoding="ISO-8859-1"%>
    <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
    <%@page import="common_things.Common_Things"%>
    <%@page import="OCR.create_file"%>
    <%@page import="OCR.read_file"%><html>
    <head>
    <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
    <!-- meta -->
    <meta charset="utf-8">
    </head>
    <body>
         <%
         String file_name=(String)request.getParameter("file_name");
         String file_name_given=(String)request.getParameter("file_name_given");
         //creating the out put file after upload and reading
         create_file obj_create_file=new create_file();
         obj_create_file.create_output_file(file_name);
         //read the created out put file
         read_file obj_read_file=new read_file();
         String read=obj_read_file.read_file(file_name);
         %>
         <%=file_name %><br>
         <%=file_name_given %><br>
         <br>
         <br>
         <hr>
         <%=read %>
         <hr>
         <br><br>
         <br><br><br>
         read success!!...
    </body>
    </html>
    
  9. testimage.png
  10. index.jsp after running the project
  11. The uploaded file will be present in C:\Tesseract-OCR.
    read.jsp after successful read operation.

  12. Download the project here,

    Download OCR.rar

Tags

Comments

Leave a Comment