package com.abmash.parser; import com.abmash.api.Browser; import com.abmash.api.HtmlElement; import com.abmash.api.HtmlElements; import com.abmash.api.query.QueryFactory; import com.abmash.parser.content.Header; import com.abmash.parser.content.Image; import com.abmash.parser.content.Input; import com.abmash.parser.content.Link; public class HtmlParser extends DocumentParser { private Browser browser; // TODO parse methods as class structure public HtmlParser(Browser browser) { super(browser.getWebDriver().getPageSource()); this.browser = browser; } @Override protected void parseUrl() { url = browser.getCurrentUrl(); } // TODO structured: texts, tables, lists, sentences @Override protected void parseVisibleText() { // TODO visible text without html tags visibleText = browser.query(QueryFactory.select("body")).findFirst().getText(); // browser.log().debug(content.getVisibleText()); } @Override protected void parseTitle() { title = browser.getWebDriver().getTitle(); // browser.log().debug(" TITLE: " + content.getTitle()); } @Override protected void parseMetaTags() { // TODO parse meta-tags // HtmlElements metaElements = browser.query().cssSelector("meta").find(); // for (HtmlElement element: metaElements) { // String key = element.getAttribute("name"); // if(key == null) key = element.getAttribute("http-equiv"); // if(key != null) { //// browser.log().debug(" META: " + webElement.getAttribute(key) + " - " + webElement.getAttribute("content")); // metaTags.put(element.getAttribute(key), element.getAttribute("content")); // } // } } @Override protected void parseHeaders() { // parse headers HtmlElements headerElements = browser.query(QueryFactory.headline()).find(); for (HtmlElement element: headerElements) { Header header = new Header(); header.setText(element.getText()); header.setSize(element.getCssValue("font-size")); // browser.log().debug(" HEADER: " + header); headers.add(header); } } @Override protected void parseLinks() { // parse links HtmlElements linkElements = browser.query(QueryFactory.select("a")).find(); for (HtmlElement element: linkElements) { Link link = new Link(); link.setUrl(element.getAttribute("href")); link.setName(element.getText()); // browser.log().debug(" LINK: " + link); links.add(link); } } @Override protected void parseImages() { // parse images HtmlElements imageElements = browser.query(QueryFactory.image()).find(); for (HtmlElement element: imageElements) { Image image = new Image(); image.setUrl(element.getAttribute("src")); image.setTitle(element.getAttribute("title")); image.setAlt(element.getAttribute("alt")); // browser.log().debug(" IMAGE: " + image); images.add(image); } } @Override protected void parseInputs() { // parse forms HtmlElements inputElements = browser.query(QueryFactory.typable()).find(); for (HtmlElement element: inputElements) { Input input = new Input(); input.setLabel(""); //TODO find label input.setType(element.getAttribute("type")); input.setName(element.getAttribute("name")); // browser.log().debug(" INPUT: " + input); inputs.add(input); } } @Override public void parsePeriodicElements() { } }