/**
* Copyright 2008 - CommonCrawl Foundation
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
**/
package org.commoncrawl.io;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.ArrayList;
import org.commoncrawl.util.GZIPUtils;
import org.commoncrawl.async.EventLoop;
import org.commoncrawl.async.Timer;
import org.commoncrawl.async.Timer.Callback;
import org.commoncrawl.io.NIOHttpConnection.State;
import org.commoncrawl.util.GZIPUtils.UnzipResult;
import org.commoncrawl.util.HttpCookieUtils.CookieStore;
/**
*
* @author rana
*
*/
public class NIOHttpConnectionUnitTest implements NIOHttpConnection.Listener {
private int finishCount = 0;
private int successCount = 0;
private int failureCount = 0;
private int connectionCount = 0;
private int resolvingCount = 0;
private int maxCount = 0;
private int socketMax = 0;
private long loopCounter = 0;
private CookieStore cookieStore = new CookieStore();
private EventLoop eventLoop;;
private ArrayList<String> urlList = null;
private static final int HEX_CHARS_PER_LINE = 32;
private static String formatException(Exception e) {
if (e == null)
return "";
else {
String exceptionString = e.toString();
return exceptionString.substring(exceptionString.lastIndexOf(".") + 1);
}
}
public static void main(String[] args) {
// set the default ccbot user agent string
NIOHttpConnection.setDefaultUserAgentString("CCBot/1.0 (+http://www.commoncrawl.org/bot.html)");
String usage = "Usage: --url <singleurl> --urls <urlsfile> --socketmax <max simulataneous sockets> --maxcount <max urls>";
String urlFilePath = null;
String singleURL = null;
int socketMax = 100;
int maxCount = 1000;
ArrayList<String> urlList = new ArrayList<String>();
if (args.length % 2 == 0) {
for (int i = 0; i < args.length; i += 2) {
String argName = args[i];
String value = args[i + 1];
if (argName.equals("--urls")) {
urlFilePath = value;
try {
System.out.println("Attempting to load URL List from Path:" + value);
URL resourceURL = ClassLoader.getSystemResource(urlFilePath);
if (resourceURL == null) {
throw new FileNotFoundException(resourceURL.getPath());
}
InputStream stream = resourceURL.openStream();
BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
String line;
int urlCount = 0;
while ((line = reader.readLine()) != null) {
urlList.add(line);
if (++urlCount == maxCount)
break;
}
System.out.println("Loaded URLS from URL List. Count:" + urlCount);
} catch (IOException e) {
System.out.println(e.toString());
e.printStackTrace();
System.exit(-1);
}
} else if (argName.equals("--url")) {
System.out.println("Adding Single URL to URL List:" + value);
urlList.add(value);
} else if (argName.equals("--socketmax")) {
socketMax = Math.max(1, Integer.parseInt(value));
} else if (argName.equals("--maxcount")) {
maxCount = Math.max(1, Integer.parseInt(value));
}
}
}
if (urlList.size() == 0) {
System.out.println(usage);
System.exit(-1);
} else {
NIOHttpConnectionUnitTest unitTest = new NIOHttpConnectionUnitTest(urlList, socketMax, maxCount);
unitTest.run();
System.exit(0);
}
}
NIOHttpConnection connections[] = null;
private ArrayList<NIOHttpConnection> closedConnections = new ArrayList<NIOHttpConnection>();
private NIOHttpConnectionUnitTest(ArrayList<String> urlList, int socketMax, int maxCount) {
eventLoop = new EventLoop();
this.maxCount = maxCount;
this.socketMax = socketMax;
this.connections = new NIOHttpConnection[socketMax];
this.urlList = urlList;
}
public String dumpAsHex(byte[] data) {
StringBuffer buf = new StringBuffer(data.length << 1);
int k = 0;
int flen = data.length;
char hexBuffer[] = new char[HEX_CHARS_PER_LINE * 2 + (HEX_CHARS_PER_LINE - 1) + 2];
char asciiBuffer[] = new char[HEX_CHARS_PER_LINE + 1];
hexBuffer[hexBuffer.length - 1] = 0;
asciiBuffer[asciiBuffer.length - 1] = 0;
for (int i = 0; i < flen; i++) {
int j = data[i] & 0xFF;
hexBuffer[k * 3] = Character.forDigit((j >>> 4), 16);
hexBuffer[k * 3 + 1] = Character.forDigit((j & 0x0F), 16);
hexBuffer[k * 3 + 2] = ' ';
if (j < 0x20)
asciiBuffer[k] = '.';
else if (k < 0x78)
asciiBuffer[k] = (char) j;
else
asciiBuffer[k] = '?';
k++;
if (k % HEX_CHARS_PER_LINE == 0) {
hexBuffer[hexBuffer.length - 2] = 0;
buf.append(hexBuffer);
buf.append(" ");
buf.append(asciiBuffer);
buf.append('\n');
k = 0;
}
}
if (k != 0) {
hexBuffer[k * 3 + 1] = 0;
asciiBuffer[k] = 0;
buf.append(hexBuffer);
buf.append(" ");
buf.append(asciiBuffer);
buf.append('\n');
}
return buf.toString();
}
public String dumpAsText(byte[] data) {
ByteBuffer bb = ByteBuffer.wrap(data);
StringBuffer buf = new StringBuffer();
buf.append(Charset.forName("ASCII").decode(bb));
return buf.toString();
}
public void dumpContent(byte[] data) {
System.out.print(dumpAsHex(data));
}
public void HttpConnectionStateChanged(NIOHttpConnection theConnection, State oldState, State state) {
// System.out.println("State Changed from oldState:"+oldState.toString()+" to newState:"+state.toString());
if (state == State.DONE || state == State.ERROR) {
if (oldState == State.AWAITING_RESOLUTION) {
resolvingCount--;
}
finishCount++;
if (state == State.DONE) {
successCount++;
System.out.println("Connection:" + theConnection.getURL() + " State == DONE. Content Length:"
+ theConnection.getContentLength() + "Content Buffer Size:" + theConnection.getContentBuffer().available());
if (theConnection.getContentBuffer().available() != 0) {
NIOBufferList contentBuffer = theConnection.getContentBuffer();
try {
// now check headers to see if it is gzip encoded
int keyIndex = theConnection.getResponseHeaders().getKey("Content-Encoding");
if (keyIndex != -1) {
String encoding = theConnection.getResponseHeaders().getValue(keyIndex);
byte data[] = new byte[contentBuffer.available()];
// and read it from the niobuffer
contentBuffer.read(data);
if (encoding.equalsIgnoreCase("gzip")) {
UnzipResult result= GZIPUtils.unzipBestEffort(data, 1024000);
contentBuffer.reset();
contentBuffer.write(result.data.get(), result.data.getOffset(), result.data.getCount());
contentBuffer.flush();
System.out.println("GUnzip Content Size:" + data.length);
}
}
byte data[] = new byte[contentBuffer.available()];
contentBuffer.read(data);
dumpContent(data);
/*
* BufferedReader reader = new BufferedReader(new
* NIOStreamDecoder(contentBuffer
* ,Charset.forName("ASCII").newDecoder()));
*
* System.out.println("Dumping Content");
*
* String line;
*
* while ((line = reader.readLine()) != null) {
* System.out.println(line); }
*/
} catch (IOException e) {
System.out.println(e);
e.printStackTrace();
} catch (Exception e) {
System.out.println(e);
e.printStackTrace();
}
}
} else if (state == State.ERROR) {
failureCount++;
}
for (int i = 0; i < connections.length; ++i) {
if (connections[i] == theConnection) {
connections[i] = null;
connectionCount--;
break;
}
}
closedConnections.add(theConnection);
} else if (state == State.AWAITING_RESOLUTION) {
resolvingCount++;
}
}
public void HttpContentAvailable(NIOHttpConnection theConnection, NIOBufferList contentBuffer) {
// TODO Auto-generated method stub
}
private final void run() {
long startTime = System.currentTimeMillis();
eventLoop.start();
eventLoop.setTimer(new Timer(1000, true, new Callback() {
public void timerFired(Timer timer) {
if (urlList.size() == 0 && connectionCount == 0) {
eventLoop.stop();
} else {
for (int j = 0; j < connections.length; ++j) {
if (connections[j] == null || connections[j].checkForTimeout()) {
if (urlList.size() != 0) {
connectionCount++;
String url = urlList.remove(0);
try {
connections[j] = new NIOHttpConnection(new URL(url), eventLoop.getSelector(),
eventLoop.getResolver(), cookieStore);
connections[j].setDownloadMax(1024000);
connections[j].open();
connections[j].setListener(NIOHttpConnectionUnitTest.this);
} catch (IOException e) {
System.out.println("Error Opening Connection for URL:" + url);
connections[j] = null;
connectionCount--;
}
}
}
}
loopCounter++;
if (loopCounter % 1 == 0) {
System.out.print("SocketStates:");
for (int i = 0; i < connections.length; ++i) {
if (connections[i] == null)
System.out.print("null,");
else {
System.out.print(connections[i].getURL() + ":" + connections[i].getState().toString() + ",");
}
}
System.out.print("\n");
}
}
}
}));
// wait on the event loop thread ...
try {
eventLoop.getEventThread().join();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
long endTime = System.currentTimeMillis();
long totalTime = endTime - startTime;
System.out.print("\n");
System.out.println("Stats:");
System.out.format("%1$30.30s", "URL");
System.out.format("%1$10s", "STATE");
System.out.format(" %1$40.40s", "Result");
System.out.format(" %1$10.10s", "HTTPCode");
System.out.format("%1$10s", "Resolve");
System.out.format("%1$10s", "Connect");
System.out.format("%1$10s", "Upload");
System.out.format("%1$10s", "Download");
System.out.format("%1$20s\n", "Size");
long totalBytes = 0;
for (NIOHttpConnection connection : closedConnections) {
System.out.format("%1$30.30s", connection.getURL());
System.out.format("%1$10s", connection.getState());
if (connection.getState() == State.ERROR) {
System.out.format(" %1$40.40s", formatException(connection.getLastException()));
System.out.format(" %1$10.10s", "-1");
} else {
System.out.format(" %1$40.40s", connection.getResponseHeaders().getValue(0));
System.out.format(" %1$10.10s", NIOHttpConnection.getHttpResponseCode(connection.getResponseHeaders()));
}
System.out.format("%1$10s", connection.getResolveTime());
System.out.format("%1$10s", connection.getConnectTime());
System.out.format("%1$10s", connection.getUploadTime());
System.out.format("%1$10s", connection.getDownloadTime());
System.out.format("%1$20s\n", connection.getDownloadLength());
if (connection.getState() == State.DONE) {
totalBytes += connection.getDownloadLength();
}
}
System.out.println("\nFinal Stats:");
System.out.println("Count:" + finishCount + " Success:" + successCount + " Failure:" + failureCount + " MS:"
+ Long.toString(totalTime) + " KBytes:" + Long.toString(totalBytes / 1000) + " KB/s:"
+ Long.toString((totalBytes / 1000) / (totalTime / 1000)));
}
}