Monday, April 2, 2012

Simple program for a crawler for saving google search to local database


Class conn


import java.sql.*;
import java.util.ArrayList;
import java.util.Properties;

class conn
{
    /* the default framework is embedded*/
    private String framework = "embedded";
    private String driver = "org.apache.derby.jdbc.EmbeddedDriver";
    private String protocol = "jdbc:derby:";
 


    public void go(int link_id,int word_id,String link ,String title,String Snippet)
    {
       

        System.out.println("DBworks starting in " + framework + " mode");
        loadDriver();

       
        Connection conn = null;
    ArrayList statements = new ArrayList(); // list of Statements, PreparedStatements
        PreparedStatement psInsert = null;
        PreparedStatement psUpdate = null;
        Statement s = null;
        ResultSet rs = null;
        try
        {
            Properties props = new Properties();
            props.put("user", "user1");
            props.put("password", "user1");
            String dbName = "crawler";

          
            conn = DriverManager.getConnection(protocol + dbName
                    + ";create=true", props);

            System.out.println("Connected to and created database " + dbName);

            conn.setAutoCommit(false);

           
            s = conn.createStatement();
            statements.add(s);
              rs = s.executeQuery("SELECT link_id,word_id,title ,snippet,link  FROM links ORDER BY link_id");       
            if (!rs.next())
            {
             s.execute("create table links(link_id int,word_id int,link varchar(80),title  varchar(40), snippet varchar(150))");
            System.out.println("Created table links");
              
            }
        
            psInsert = conn.prepareStatement("insert into links values (?, ? ,? ,?,?)");
            statements.add(psInsert);

            psInsert.setInt(1,1);
            psInsert.setInt(2,2);
            psInsert.setString(3, "qwerty");
            psInsert.setString(4, "qwerty");
            psInsert.setString(5, "qwerty");
            psInsert.executeUpdate();
            System.out.println("Inserted qwerty");
           
            rs = s.executeQuery("SELECT link_id,word_id,title ,snippet,link  FROM links ORDER BY link_id");        
            boolean failure = false;
              int number;
            if (!rs.next())
            {
                failure = true;
                reportFailure("No Data In Database");
            }

            if ((number = rs.getInt(1)) != 1)
            {
                failure = true;
                reportFailure(
                        "Wrong row returned, expected link_id=1, got " + number);
            }
       
                       if (!failure) {
                System.out.println("Verified the rows");
            }

            s.execute("drop table links");
            System.out.println("Dropped table links");

          
            conn.commit();
            System.out.println("Committed the transaction");
            if (framework.equals("embedded"))
            {
                try
                {   
                    DriverManager.getConnection("jdbc:derby:;shutdown=true");
                }
                catch (SQLException se)
                {
                    if (( (se.getErrorCode() == 50000)&& ("XJ015".equals(se.getSQLState()) )))
                          {
                             System.out.println("Derby shut down normally");
                          }
                    else  {
                             System.err.println("Derby did not shut down normally");
                             printSQLException(se);
                          }
                }
            }
        }
        catch (SQLException sqle)
        {
            printSQLException(sqle);
        } finally {
            // release all open resources to avoid unnecessary memory usage

            // ResultSet
            try {
                if (rs != null) {
                    rs.close();
                    rs = null;
                }
            } catch (SQLException sqle) {
                printSQLException(sqle);
            }

            // Statements and PreparedStatements
            int i = 0;
            while (!statements.isEmpty()) {
                // PreparedStatement extend Statement
                Statement st = (Statement)statements.remove(i);
                try {
                    if (st != null) {
                        st.close();
                        st = null;
                    }
                } catch (SQLException sqle) {
                    printSQLException(sqle);
                }
            }

            //Connection
            try {
                if (conn != null) {
                    conn.close();
                    conn = null;
                }
            } catch (SQLException sqle) {
                printSQLException(sqle);
            }
        }
    }


    private void loadDriver() {
        try {
            Class.forName(driver).newInstance();
            System.out.println("Loaded the appropriate driver");
        } catch (ClassNotFoundException cnfe) {
            System.err.println("\nUnable to load the JDBC driver " + driver);
            System.err.println("Please check your CLASSPATH.");
            cnfe.printStackTrace(System.err);
        } catch (InstantiationException ie) {
            System.err.println(
                        "\nUnable to instantiate the JDBC driver " + driver);
            ie.printStackTrace(System.err);
        } catch (IllegalAccessException iae) {
            System.err.println(
                        "\nNot allowed to access the JDBC driver " + driver);
            iae.printStackTrace(System.err);
        }
    }

    private void reportFailure(String message) {
        System.err.println("\nData verification failed:");
        System.err.println('\t' + message);
    }

  
    public static void printSQLException(SQLException e)
    {
      
        while (e != null)
        {
            System.err.println("\n----- SQLException -----");
            System.err.println("  SQL State:  " + e.getSQLState());
            System.err.println("  Error Code: " + e.getErrorCode());
            System.err.println("  Message:    " + e.getMessage());
            e = e.getNextException();
        }
    }
}

class crawler with main class

 

    import java.awt.*;
    import java.awt.event.*;
    import java.util.*;
    import java.net.*;
    import java.io.*;
    import javax.swing.*;
  
    import java.net.URL;
    import java.net.MalformedURLException;
    import java.util.regex.Pattern;
    import java.util.regex.Matcher;

    public class Crawler extends conn
    {
         public static void main(String [] args)
         {
              JFrame frame = new EditorPaneFrame();
              frame.show();
                        
         }
    }
    class EditorPaneFrame extends JFrame
    {
       
         conn connection = new conn();
         private JTextField url;
         private JButton loadButton;
         private JButton backButton;
         private JEditorPane editorPane;
         private Stack urlStack = new Stack();
    
    
         public EditorPaneFrame()
         {
              setTitle("Web Crawler");
              setSize(600,400);
              addWindowListener(new WindowAdapter()
              {
                   public void windowClosing(WindowEvent e)
                   {
                        System.exit(0);
                   }
          } );
    
             
              // set up text field and load button for typing in URL
    
             url = new JTextField(30);
    
              loadButton = new JButton("Search");
              loadButton.addActionListener(new ActionListener()
              {
                   public void actionPerformed(ActionEvent event)
                   {  
                         try
                         {
String  search = "https://www.googleapis.com/customsearch/v1?key=YPUR-GOOGLE-API-KEY&cx=013036536707430787589:_pqjad5hr1a&q="+url.getText()+"&alt=json";
                    
                          URL url = new URL(search);
                          URLConnection link = url.openConnection();
                           
                          BufferedReader reader = new BufferedReader( new InputStreamReader(link.getInputStream()));
                           Pattern linkPattern = Pattern.compile("[\"]link[\"]:.*");
                           Pattern titlePattern = Pattern.compile("[\"]title[\"]:.*");
                           Pattern snippetPattern = Pattern.compile("[\"]snippet[\"]:.*");
                                                                
                          String line;
                          String newlink;
                           String title;
                            String snippet;
                             while ((line =reader.readLine()) != null)
                             {  Matcher l = linkPattern.matcher(line);
                                Matcher t =titlePattern.matcher(line);
                                Matcher s = snippetPattern.matcher(line);
                                while(l.find())
                                {   newlink=l.group();
                                    newlink=newlink.replaceAll("[\"]link[\"]: [\"]", "");
                                  System.out.println(newlink);
                                
                                }
                                while(t.find())
                                {  title=t.group();
                                    title=title.replaceAll("[\"]title[\"]: [\"]", "");
                                  System.out.println(title);
                                
                                }
                                while(s.find())
                                {  snippet=s.group();
                                    snippet=snippet.replaceAll("[\"]snippet[\"]: [\"]", "");
                                  System.out.println(snippet);                                
                                } 
                  
                             }                             
                             reader.close();                           
                         }                      
                     catch (MalformedURLException e)
                              {
                                  e.printStackTrace();
                              }
                     catch (IOException e)
                             {
                                  e.printStackTrace();
                             }                                         
    };
   });
    
              // set up back button and button action
    
              backButton = new JButton("Back");
              backButton.addActionListener(new ActionListener()
              {
                   public void actionPerformed(ActionEvent event)
                   {
                        if(urlStack.size()<=1) return;
                        try
                        {
                             urlStack.pop();
                             String urlString = (String)urlStack.peek();
                             url.setText(urlString);
                             editorPane.setPage(urlString);
                        }
                        catch(IOException e)
                        {
                             editorPane.setText("Error : " +e);
                        }
                   }
              });
    
              editorPane = new JEditorPane();
              editorPane.setEditable(false); 
              Container contentPane = getContentPane();
              contentPane.add(new JScrollPane(editorPane), "Center");
              JPanel panel = new JPanel();
              panel.add(new JLabel("Search Term"));
              panel.add(url);
              panel.add(loadButton);
              panel.add(backButton);
    
              contentPane.add(panel,"South");
         }
    
    }