package com.smash.revolance.ui.explorer; /* * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * Revolance-UI-Explorer * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * Copyright (C) 2012 - 2013 RevoLance * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this program. If not, see * <http://www.gnu.org/licenses/gpl-3.0.html>. * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ import com.smash.revolance.ui.model.bot.Bot; import com.smash.revolance.ui.model.element.api.Button; import com.smash.revolance.ui.model.element.api.Data; import com.smash.revolance.ui.model.element.api.Element; import com.smash.revolance.ui.model.element.api.Link; import com.smash.revolance.ui.model.helper.BotHelper; import com.smash.revolance.ui.model.helper.ImageHelper; import com.smash.revolance.ui.model.page.api.Page; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.openqa.selenium.StaleElementReferenceException; import org.openqa.selenium.WebElement; import java.util.ArrayList; import java.util.Collections; import java.util.List; /** * User: wsmash * Date: 02/06/13 * Time: 12:35 */ public class PageParser { private Page page; public PageParser(Page page) { this.page = page; } private List<Element> parseContent() throws Exception { page.getUser().getLogger().log(Level.INFO, "Parsing elements" ); long mark = System.currentTimeMillis(); List<Element> content = _parseContent(); logClickableContent( content ); long duration = ( System.currentTimeMillis() - mark ) / 1000; page.getUser().getLogger().log(Level.INFO, "Parsing elements [Done] [Duration: " + duration + " sec]" ); return content; } private void logClickableContent(List<Element> content) { // List<Element> clickableContent = Element.filterClickableElements( content ); if ( !content.isEmpty() ) { page.getUser().getLogger().log(Level.INFO, "Clickable content found: " ); for ( Element element : content ) { if(element instanceof Link || element instanceof Button) { page.getUser().getLogger().log(Level.INFO, "--| " + element.getContent() ); } } } else { page.getUser().getLogger().log(Level.INFO, "No clickable content has been found." ); } } private List<Element> _parseContent() throws Exception { List<Element> content = new ArrayList(); if ( !page.isExternal() && !page.isBroken() ) { // retrieve all the elements of the html body content = getElements(); // filter the elements included in each other (optimization) _filterElementsIncludedInEachOther(content); // takes screenshot of all the content takeScreenshots( content ); // only for convenience to be able to track the click sequence Collections.sort( content ); } page.setContent( content ); return content; } private void _filterElementsIncludedInEachOther(List<Element> content) throws Exception { long mark = System.currentTimeMillis(); page.getUser().getLogger().log(Level.INFO, "Filtering page elements" ); Element.filterElementsIncludedInEachOthers( content, page.getArea() * 0.95, 1 / 8 ); long duration = ( System.currentTimeMillis() - mark ) / 1000; page.getUser().getLogger().log(Level.INFO, "Filtering page elements [Done] [Duration: " + duration + "sec]"); page.getUser().getLogger().log(Level.INFO, "Found: " + content.size() + " pertinent elements" ); } public String takeScreenShot() throws Exception { Bot bot = page.getUser().getBot(); if ( page.getCaption().isEmpty() ) { // if(getTitle().isEmpty()) // { // setTitle( getBot().getCurrentTitle() ); // } if ( page.getUser().isPageScreenshotEnabled() ) { page.getUser().getLogger().log(Level.INFO, "Taking page snapshot: '" + page.getTitle() + "'" ); long mark = System.currentTimeMillis(); String img = BotHelper.takeScreenshot( bot ); if ( img != null ) { // update the image and the caption page.setImage( ImageHelper.decodeToImage( img ) ); page.setScreenshotTaken( true ); } long duration = ( System.currentTimeMillis() - mark ) / 1000; page.getUser().getLogger().log(Level.INFO, "Taking page snapshot: '" + page.getTitle() + "' [Done] [Duration: " + duration + " sec]" ); } } return page.getCaption(); } private void takeScreenshots(List<Element> content) throws Exception { if ( page.getUser().isPageScreenshotEnabled() && page.getUser().isPageElementScreenshotEnabled() ) { long mark = System.currentTimeMillis(); int contentIdx = 0; for ( Element pageElement : content ) { contentIdx++; page.getUser().getLogger().log(Level.INFO, String.format("Taking element screenshots ( %d / %d )", contentIdx, content.size() )); pageElement.takeScreenShot(); } long duration = ( System.currentTimeMillis() - mark ) / 1000; page.getUser().getLogger().log(Level.INFO, "Taking elements screenshots [Done] [Duration: " + duration + "sec]"); } } private int getHeight(Bot bot) throws Exception { //Object o = bot.runJS( "return Math.max(document.body.clientHeight, window.innerHeight)" ); Object o = bot.runJS( "var D = document; return Math.max(D.body.scrollHeight, D.documentElement.scrollHeight,D.body.offsetHeight, D.documentElement.offsetHeight,D.body.clientHeight, D.documentElement.clientHeight);" ); if ( o == null ) { return page.getUser().getBrowserHeight(); } return Integer.parseInt( String.valueOf( (Long) o ) ); } private int getWidth(Bot bot) throws Exception { //Object o = bot.runJS( "return Math.max(document.body.clientWidth, window.innerWidth)" ); Object o = bot.runJS( "var D = document; return Math.max(D.body.scrollWidth, D.documentElement.scrollWidth,D.body.offsetWidth, D.documentElement.offsetWidth,D.body.clientWidth, D.documentElement.clientWidth);" ); if ( o == null ) { return page.getUser().getBrowserWidth(); } return Integer.parseInt( String.valueOf( (Long) o ) ); } public void parse() throws Exception { Logger logger = page.getUser().getLogger(); if ( !page.hasBeenParsed() && !page.isExternal() ) { if ( page.getUser().getCurrentPage() != page ) { page.getUser().goTo( page ).awaitLoaded(); } page.setWidth( getWidth( page.getUser().getBot() ) ); page.setHeight( getHeight( page.getUser().getBot() ) ); if ( page.getApplication().isPageBroken( page ) ) { page.setBroken( true ); } if ( !page.getApplication().isAuthorized( page ) ) { page.setAuthorized( false ); } if ( page.getUser().isPageScreenshotEnabled() && page.getCaption().isEmpty() ) { takeScreenShot(); } if ( !page.isBroken() && page.isEmpty() ) { parseContent(); } else { logger.log(Level.WARN, "Page with url: '" + page.getUrl() + "' is broken."); } page.setParsed( true ); } else if ( page.isExternal() ) { logger.log(Level.WARN, "Page with url: '" + page.getUrl() + "' is out of the domain: '" + page.getUser().getDomain() + "'."); } } public List<Element> getElements() throws Exception { final Logger logger = page.getUser().getLogger(); long mark = System.currentTimeMillis(); List<Element> elements = new ArrayList<Element>(); List<WebElement> webElements = BotHelper.getRawElements( page.getUser().getBot(), page ); int idx = 0; int elementCount = webElements.size(); for ( WebElement element : webElements ) { idx++; try { if ( element.isDisplayed() ) { Class<? extends Element> elemImpl = Element.getImplementation( element ); if ( elemImpl != null ) { Element elem = elemImpl.getConstructor( Page.class, WebElement.class ).newInstance( page, element ); if ( elem.getArea() > 0 ) { // handleAddition( elements, elem ); elements.add( elem ); } } } } catch (StaleElementReferenceException e) { logger.log(Level.ERROR, e); } finally { logger.log(Level.INFO, "Retrieving page element ( " + idx + "/" + elementCount + " )" ); } } long duration = ( System.currentTimeMillis() - mark ) / 1000; logger.log(Level.INFO, "Retrieving page elements [Done] [Duration: " + duration + "sec]" ); return elements; } public static void handleAddition(List<Element> elements, Element elem) { boolean isToAdd = true; List<Element> toBeRemoved = new ArrayList<Element>(); if ( elem instanceof Data && elem.getContent().isEmpty() ) { return; } else if ( elem instanceof Button || elem instanceof Link || elem instanceof Data ) { for ( Element element : elements ) { if ( element.isIncluded( elem ) || elem.isIncluded( element ) ) { if ( elem instanceof Data && element instanceof Data ) { if ( elem.getArea() < element.getArea() ) { toBeRemoved.add( element ); } else { isToAdd = false; } } else if ( elem instanceof Data && ( element instanceof Link || element instanceof Button ) ) { isToAdd = false; } else if ( elem instanceof Button || elem instanceof Link ) { toBeRemoved.add( element ); } // Optimisation since we're calling this at each addition & we add one element at a time // there is no need to run the complete loop break; } } } elements.removeAll( toBeRemoved ); if ( isToAdd ) { elements.add( elem ); } } }