package org.Webgatherer.Controller;
import org.Webgatherer.CoreEngine.lib.WebDriverFactory;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import java.util.ArrayList;
import java.util.List;
/**
* @author Rick Dane
*/
public class Entry_ExampleMain {
/**
* FOR TESTING ONLY
*
* @param args
*/
public static void main(String[] args) {
testDriver();
}
private static String prepareBaseDomainUrl(String url) {
int index = url.indexOf("/");
if (index != -1) {
url = url.substring(0, index);
}
return url;
}
private static void testDriver() {
String url = "http://www.crunchbase.com/maps/search?range=140&geo=san+francisco%2C+ca";
WebDriverFactory webDriverFactory = new WebDriverFactory();
WebDriver driver = webDriverFactory.createNewWebDriver();
driver.get(url);
driver.get(url);
List<WebElement> links;
List<String[]> initialUrls = new ArrayList<String[]>();
List<String> urls = new ArrayList<String>();
links = driver.findElements(By.tagName("a"));
for (WebElement link : links) {
String onclick = null;
try {
onclick = link.getAttribute("onclick");
if (onclick.startsWith("snap_to_marker")) {
String title = link.getAttribute("title");
Thread.sleep(500);
link.click();
String[] tmpArray = {convertToUrl(title), title};
initialUrls.add(tmpArray);
}
} catch (Exception e) {
//e.printStackTrace();
}
}
for (String[] curEntry : initialUrls) {
String pulledUrl = pullCompanyUrl(driver, curEntry);
if (pulledUrl != null) {
urls.add(pulledUrl);
System.out.println("<a href='" + pulledUrl + "'>" + pulledUrl + "</a>");
}
}
driver.close();
}
private static String pullCompanyUrl(WebDriver driver, String[] origUrl) {
List<String> returnUrls = new ArrayList<String>();
driver.get(origUrl[0]);
List<WebElement> links = driver.findElements(By.tagName("a"));
for (WebElement curElement : links) {
String matchUrl = curElement.getAttribute("href");
if (matchUrl != null && !matchUrl.contains("crunchbase") && matchUrl.contains(origUrl[1].substring(0, 6))) {
return matchUrl;
}
}
return null;
}
private static String convertToUrl(String inputStr) {
String[] split = inputStr.split("-");
String retString = "";
if (split != null && split.length > 0) {
retString = split[0];
} else {
retString = inputStr;
}
retString = retString.trim().replace(" ", "-").toLowerCase();
return "http://www.crunchbase.com/company/" + retString;
}
}