/* * * This Class crawls the specified URL page *package javacrawlersystem;/** * * @author Arpit.Sharma */ import java.io.BufferedReader; import java.io.DataInputStream; import java.io.File; import java.io.FileOutputStream; import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; import java.util.Iterator; public class HTMLCodeDownloader {/** * @param args the command line arguments */ public static void main(String[] args) {// TODO code application logic here try { // This creates the test.txt file File newFile=new File("C:/Documents and Settings/Usual Account/Desktop/test3.html"); // Here we give the URL for the Crawler URL url = new URL("http://cl.thapar.edu/library_qp05.html"); FileOutputStream fop=new FileOutputStream(newFile);// ArrayList<String> htmlLine=new ArrayList<String>(); System.setProperty("http.proxyHost",""); System.setProperty("http.proxyPort", ""); URLConnection conn = url.openConnection(); DataInputStream in = new DataInputStream ( conn.getInputStream ( ) ) ; BufferedReader d = new BufferedReader(new InputStreamReader(in)); while(d.ready()) { //System.out.println(d.readLine()); htmlLine.add(d.readLine()+"\n"); } Iterator itr=htmlLine.iterator(); while(itr.hasNext()) { fop.write(((String)(itr.next())).getBytes()); } fop.flush(); fop.close(); } catch(Exception e) { System.out.println(e); } } }