package student.web.internal.tests;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import student.web.internal.WebContent;
public class Foo
{
public static void main(String[] args)
throws Exception
{
student.web.TurboWebBot bot = new student.web.TurboWebBot(
"http://localhost/bottest.html");
// cs1705.web.TurboWebBot bot = new cs1705.web.TurboWebBot(
// "http://www.dailymail.co.uk/textbased/channel-1/index.html");
// new cs1705.web.WebBot("http://news.bbc.co.uk/text_only.stm");
bot.resetElementsOfInterest("a");
while (!bot.isLookingAtEndOfPage())
{
bot.advanceToNextLink();
if (bot.isLookingAtLink())
{
bot.out().println("link: url = " + bot.getLinkURI()
+ ", text = " + bot.getCurrentElementText()
+ ", element = " + bot.getCurrentElement());
}
}
}
public static void main1(String[] args)
throws Exception
{
URL url = new URL("http://news.search.yahoo.com/news/rss?p=obama");
URLConnection c = url.openConnection();
HttpURLConnection connection = (HttpURLConnection)c;
connection.setRequestProperty("User-Agent",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)");
// connection.setRequestProperty("Cookie", "B=5lukv694tkmcr&b=3&s=ic");
connection.connect();
System.out.println("follow redirects = " + HttpURLConnection.getFollowRedirects());;
System.out.println("response code = " + connection.getResponseCode());
System.out.println("header fields = " + connection.getHeaderFields());
String headerName = null;
for (int i = 1; (headerName = connection.getHeaderFieldKey(i)) != null; i++)
{
System.out.println(" " + headerName + " = " + connection.getHeaderField(i));
}
if (connection.getResponseCode() == 999 && connection.getErrorStream() != null)
{
try
{
Scanner in = new Scanner(connection.getErrorStream());
in.useDelimiter("\\z");
StringBuffer sb = new StringBuffer(4096);
while (in.hasNext())
{
sb.append(in.next());
}
in.close();
Pattern p = Pattern.compile("<a href=\"([^\"]*)\">let us know</a>");
Matcher m = p.matcher(sb.toString());
if (m.find())
{
String cookieUrl = m.group(1);
System.out.println("error url = " + cookieUrl);
URLConnection errConnection =
(new URL(cookieUrl)).openConnection();
errConnection.connect();
Map<String, List<String>> headers =
connection.getHeaderFields();
List<String> responseCookies = headers.get("Set-Cookie");
if (responseCookies != null)
{
for (String cookieVal : responseCookies)
{
String[] segments = cookieVal.split("\\s*;\\s*");
if (segments != null && segments.length >= 1)
{
for (String segment : segments)
{
if (segment.toLowerCase()
.startsWith("domain="))
{
String host = segment.substring(
"domain=".length());
System.out.println("cookie = " + segments[0]);
System.out.println("host = " + host);
break;
}
}
}
}
}
}
}
catch (Exception e)
{
e.printStackTrace();
}
}
// if (true) return;
// try
// {
//
//
// URL url =
// new URL("http://news.search.yahoo.com/news/rss?p=nuclear+iran");
// System.out.println("'" + url.getHost() + "'");
//
// long before = System.currentTimeMillis();
// String yahooContent = WebContent.get(url);
// long after = System.currentTimeMillis();
//
// System.out.println("read time: " + (after - before) + " ms");
//
// before = System.currentTimeMillis();
// WebContent.get("http://www.vt.edu/");
// after = System.currentTimeMillis();
//
// System.out.println("read time: " + (after - before) + " ms");
// System.out.println(yahooContent);
// }
// catch (Exception e)
// {
// e.printStackTrace();
// }
}
}