package info.ephyra.util;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
/**
* Determines the page rank of a URL.
*
* @author Manas Pathak
* @version 2008-02-10
*/
public class PageRankParser {
private static int getCheckSum(String url) {
if (!validUrl(url)) {
return 0;
} else {
return generateCheckSum(strord("info:" + url));
}
}
private static String getQueryUrl(String url) {
int checksum;
checksum = getCheckSum(url);
if (checksum == 0) {
return null;
}
String temp = "";
try {
temp = URLEncoder.encode(url, "UTF-8");
} catch (UnsupportedEncodingException ex) {
ex.printStackTrace();
}
return "http://www.google.com/search?client=navclient-auto&ch=6" + checksum + "&ie=UTF-8&oe=UTF-8&features=Rank" + "&q=info:" + temp;
}
// private static String getXmlQueryUrl(String url) {
// int checksum;
// checksum = getCheckSum(url);
//
// if (checksum == 0) {
// return null;
// }
//
// String temp = "";
//
// try {
// temp = URLEncoder.encode(url, "UTF-8");
// } catch (UnsupportedEncodingException ex) {
// ex.printStackTrace();
// }
//
// return "http://www.google.com/search?client=navclient-auto&ch=6" + checksum + "&ie=UTF-8&oe=UTF-8" + "&q=info:" + temp;
// }
private static int[] strord(String str) {
int result[] = new int[str.length()];
for(int i = 0; i < str.length(); i++) {
result[i] = str.charAt(i);
}
return result;
}
private static int zeroFill(int a, int b) {
int z = 0x80000000;
if ((z & a) != 0) {
a >>= 1;
a &= ~z;
a |= 0x40000000;
a >>= b - 1;
} else {
a >>= b;
}
return a;
}
private static int[] mix(int a, int b, int c) {
a -= b;
a -= c;
a ^= zeroFill(c, 13);
b -= c;
b -= a;
b ^= a << 8;
c -= a;
c -= b;
c ^= zeroFill(b, 13);
a -= b;
a -= c;
a ^= zeroFill(c, 12);
b -= c;
b -= a;
b ^= a << 16;
c -= a;
c -= b;
c ^= zeroFill(b, 5);
a -= b;
a -= c;
a ^= zeroFill(c, 3);
b -= c;
b -= a;
b ^= a << 10;
c -= a;
c -= b;
c ^= zeroFill(b, 15);
return (new int[] {a, b, c});
}
private static int generateCheckSum(int url[]) {
int length = url.length;
int init = 0xe6359a60;
int a = 0x9e3779b9;
int b = 0x9e3779b9;
int c = init;
int k = 0;
int len;
int mix[];
for(len = length; len >= 12; len -= 12) {
a += url[k + 0] + (url[k + 1] << 8) + (url[k + 2] << 16) + (url[k + 3] << 24);
b += url[k + 4] + (url[k + 5] << 8) + (url[k + 6] << 16) + (url[k + 7] << 24);
c += url[k + 8] + (url[k + 9] << 8) + (url[k + 10] << 16) + (url[k + 11] << 24);
mix = mix(a, b, c);
a = mix[0];
b = mix[1];
c = mix[2];
k += 12;
}
c += length;
switch (len) {
case 11: // '\013'
c += url[k + 10] << 24;
// fall through
case 10: // '\n'
c += url[k + 9] << 16;
// fall through
case 9: // '\t'
c += url[k + 8] << 8;
// fall through
case 8: // '\b'
b += url[k + 7] << 24;
// fall through
case 7: // '\007'
b += url[k + 6] << 16;
// fall through
case 6: // '\006'
b += url[k + 5] << 8;
// fall through
case 5: // '\005'
b += url[k + 4];
// fall through
case 4: // '\004'
a += url[k + 3] << 24;
// fall through
case 3: // '\003'
a += url[k + 2] << 16;
// fall through
case 2: // '\002'
a += url[k + 1] << 8;
// fall through
case 1: // '\001'
a += url[k + 0];
// fall through
default:
mix = mix(a, b, c);
break;
}
return mix[2];
}
private static boolean validUrl(String url) {
if (url == null || !url.startsWith("http")) {
return false;
}
try {
new URL(url);
} catch(MalformedURLException e) {
return false;
}
return true;
}
public static int getPageRank(String url) {
int pageRank = -1;
String query = getQueryUrl(url);
if (query == null) {
return pageRank;
}
BufferedReader in = null;
try {
URL pr = new URL(query);
URLConnection conn = pr.openConnection();
in = new BufferedReader(new InputStreamReader(conn.getInputStream()));
String line = null;
do {
if ((line = in.readLine()) == null) {
break;
}
if (line.contains(":")) {
String tokens[] = line.split(":");
if(tokens.length > 2)
pageRank = Integer.parseInt(tokens[2]);
}
} while(true);
} catch(Exception e) {
}
return pageRank;
}
public static void main(String[] args) {
int pr = PageRankParser.getPageRank(args[0]);
System.out.println(pr);
}
}