package student.web.internal.tests;
import student.web.*;
import java.net.*;
import java.util.*;
import student.web.TurboWebBot;
// -------------------------------------------------------------------------
/**
* This subclass of WebBot will collect data from a specified page.
*
* @author dboynton
* @version 2009.02.11
*/
public class SourceForgeScraper
extends TurboWebBot
{
//~ Instance/static variables .............................................
//~ Constructor ...........................................................
/**
* Constructor with no page specified. This will point to a default.
*/
public SourceForgeScraper() {
super("http://sourceforge.net/softwaremap/");
}
//~ Methods ...............................................................
/**
* This method locates all html code within a specified div tag.
*
* @param divName The name of the html div tag that we are interested in.
* @return All html code between within the specified Div. Null on error.
*/
public String getDivContents(String divName) {
String pageHTML = getPageContent();
String searchParam = "<div id=\"" + divName + "\">";
int startPosition = 0;
int endPosition = 0;
// Find the division in question. If found, set the starting marker
// at the end of the tag, and the ending marker at the first div close.
startPosition = pageHTML.indexOf(searchParam, 0);
if (startPosition != -1) {
startPosition += searchParam.length();
endPosition = pageHTML.indexOf("</div>", startPosition);
}
// If the requested information was found, return the string.
// Otherwise, return null.
if (startPosition != -1 && endPosition != -1) {
return pageHTML.substring(startPosition, endPosition);
}
else {
return null;
}
}
/**
* This method will return all aplications located within a specified
* block of HTML. The block will be identified by its Division id.
*
* @param type A String identifying the type of application to look for.
* @returns An ArrayList containing all of the applications that were found.
*/
public ArrayList<String> getApplicationNames(String type) {
String divContents = "";
ArrayList<String> recoveredApps = new ArrayList<String>();
returnToStartOfPage();
divContents = getDivContents(type);
jumpToThisHTML(divContents);
advanceToNextLink();
while (!isLookingAtEndOfPage()) {
if (getLinkURI().toString().contains("projects")) {
recoveredApps.add(getCurrentElementText());
}
advanceToNextLink();
}
returnToPreviousPage();
return recoveredApps;
}
/**
* Prints the most active projects on SourceForge.
*/
public void printMostActive() {
for (String application : getApplicationNames("most_active")) {
out().println(application);
}
}
/**
* Prints the projects that have the most downloads.
*/
public void printMostPopular() {
for (String application : getApplicationNames("most_downloaded")) {
out().println(application);
}
}
}