package org.Webgatherer.ExperimentalLabs.Scraper.Deprecated; import com.google.gson.Gson; import org.Webgatherer.Controller.EntityTransport.EntryTransport; import org.Webgatherer.CoreEngine.lib.WebDriverFactory; import org.openqa.selenium.By; import org.openqa.selenium.WebDriver; import org.openqa.selenium.WebElement; import java.util.ArrayList; import java.util.List; /** * @author Rick Dane */ public class Entry_Scraper2 extends ScraperBaseStatic { private static int pageNum = 4; private static int maxPages = 7; private static String fileOutput = "/home/user/Dropbox/Rick/WebGatherer/Output/crunchbase"; public static void main(String[] args) { WebDriverFactory webDriverFactory = new WebDriverFactory(); WebDriver driver = webDriverFactory.createNewWebDriver(); while (pageNum <= maxPages) { sleep(); getLinkFromOnclickElement(driver, "http://www.crunchbase.com/maps/search?range=140&geo=san+francisco%2C+ca&page=", "http://www.crunchbase.com/company/", "crunchbase"); pageNum++; } driver.close(); } private static void getLinkFromOnclickElement(WebDriver driver, String searchUrl, String baseUrl, String key) { String url = searchUrl + pageNum; driver.get(url); List<WebElement> links; List<String[]> initialUrls = new ArrayList<String[]>(); List<String> urls = new ArrayList<String>(); links = driver.findElements(By.tagName("a")); for (WebElement link : links) { String onclick = null; try { onclick = link.getAttribute("onclick"); if (onclick.startsWith("snap_to_marker")) { String title = link.getAttribute("title"); sleep(); link.click(); String[] tmpArray = {convertToUrl(baseUrl, title).toLowerCase(), title.toLowerCase()}; initialUrls.add(tmpArray); } } catch (Exception e) { //e.printStackTrace(); } } for (String[] curEntry : initialUrls) { String pulledUrl = pullCompanyUrl(driver, curEntry, key); if (pulledUrl != null) { urls.add(pulledUrl); System.out.println(pulledUrl); //PersistenceImpl_WriteToFile.appendToFile(fileOutput, pulledUrl + "\n"); persistEntry(pulledUrl); } } } /** * very crude, just testing the functionality of making a service call, this really should be in a different class * @param data */ private static void persistEntry(String data) { EntryTransport entryTransport = new EntryTransport(); entryTransport.setDescription(data); Gson gson = new Gson(); String jsonData = gson.toJson(data); webServiceClient.servicePost("", jsonData, webServiceContentType); } }