package org.Webgatherer.ExperimentalLabs.Scraper.Deprecated;
import org.Webgatherer.CoreEngine.lib.WebDriverFactory;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.support.ui.Wait;
import java.util.ArrayList;
import java.util.List;
/**
* @author Rick Dane
*/
public class PlacesScraper {
public static void main(String[] args) {
WebDriverFactory webDriverFactory = new WebDriverFactory();
WebDriver driver = webDriverFactory.createNewWebDriver();
Wait<WebDriver> wait = webDriverFactory.createWebDriverWait(driver, 2);
run(driver, "livermore");
driver = webDriverFactory.createNewWebDriver();
run(driver, "san leandro");
}
private static void run(WebDriver driver, String searchStr) {
String searchString = searchStr + ",+ca+software+company";
int pageNumber = 2;
int start = pageNumber * 10 - 10;
String url = "https://www.google.com/search?gcx=c&sourceid=chrome&ie=UTF-8&q=google+places#q=" + searchString + "&hl=en&tbm=plcs&prmd=imvns&start=" + start;
driver.get(url);
List<WebElement> links;
links = driver.findElements(By.tagName("a"));
for (WebElement link : links) {
String linkStr = link.getAttribute("href");
if (linkStr != null && checkIfMatch(linkStr)) {
System.out.println(linkStr);
}
}
//driver.quit();
}
private static boolean checkIfMatch(String linkStr) {
List<String> negativeMatches = new ArrayList<String>();
negativeMatches.add("google");
negativeMatches.add("youtube");
List<String> positiveMatches = new ArrayList<String>();
positiveMatches.add("http");
for (String curMatch : negativeMatches) {
if (linkStr.contains(curMatch)) {
return false;
}
}
for (String curMatch : positiveMatches) {
if (!linkStr.contains(curMatch)) {
return false;
}
}
return true;
}
}