/* * Tanaguru - Automated webpage assessment * Copyright (C) 2008-2015 Tanaguru.org * * This file is part of Tanaguru. * * Tanaguru is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * * Contact us by mail: tanaguru AT tanaguru DOT org */ package org.tanaguru.ruleimplementation; import java.util.*; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.StringUtils; import org.jsoup.nodes.Element; import org.tanaguru.entity.audit.*; import org.tanaguru.entity.subject.Site; import org.tanaguru.entity.subject.WebResource; import org.tanaguru.processor.SSPHandler; import org.tanaguru.rules.elementselector.ElementSelector; import org.tanaguru.rules.elementselector.SimpleElementSelector; import org.tanaguru.rules.keystore.AttributeStore; import org.tanaguru.rules.keystore.CssLikeQueryStore; import org.tanaguru.rules.textbuilder.TextElementBuilder; import org.tanaguru.service.ProcessRemarkService; /** * This abstract class checks the unicity of each element selected from a css-like * expression at the site level. If two elements are identical and one * doesn't point to the other thanks to the "rel=canonical" mechanism, a * sourceCodeRemark is created, and the final result is false, true instead. * * For a given page, the first occurence of the selection is used to extract the * data to test, the other occurences are ignored. * * @author jkowalczyk */ public class AbstractUniqueElementSiteRuleImplementation extends AbstractSiteRuleWithPageResultImplementation { private static final String REL_CAN_VALUE_REMARK_MSG = "relCanonicalValue"; /* the page level message code*/ private final String pageLevelMessageCode; /* the site level message code*/ private final String siteLevelMessageCode; /* the elementSelector*/ private final ElementSelector elementSelector; /* the textElementBuilder*/ private final TextElementBuilder textElementBuilder; /* canonical testing*/ private final boolean canonicalTesting; /* the processRemarkService*/ private ProcessRemarkService prs; /** * * @param elementSelector * @param textElementBuilder * @param pageLevelMessageCode * @param siteLevelMessageCode * @param canonicalTesting */ public AbstractUniqueElementSiteRuleImplementation( ElementSelector elementSelector, TextElementBuilder textElementBuilder, String pageLevelMessageCode, String siteLevelMessageCode, boolean canonicalTesting) { super(); this.elementSelector = elementSelector; this.textElementBuilder = textElementBuilder; this.pageLevelMessageCode = pageLevelMessageCode; this.siteLevelMessageCode = siteLevelMessageCode; this.canonicalTesting = canonicalTesting; } @Override protected ProcessResult processImpl(SSPHandler sspHandler) { prs = sspHandler.getProcessRemarkService(); prs.resetService(); ElementHandler<Element> eh = new ElementHandlerImpl(); elementSelector.selectElements(sspHandler, eh); String text = null; if (!eh.isEmpty()) { Element element = eh.get().iterator().next(); text = textElementBuilder.buildTextFromElement(element); } Collection<ProcessRemark> remarks = null; if (canonicalTesting) { remarks = new ArrayList<>(); extractRelCanonical( sspHandler, prs, remarks); } IndefiniteResult result = processResultDataService.getIndefiniteResult( test, sspHandler.getPage(), text, remarks); // may be null return result; } @Override protected List<DefiniteResult> consolidateSiteImpl( Site group, List<ProcessResult> groupedGrossResultList, ProcessRemarkService processRemarkService) { prs = processRemarkService; processRemarkService.resetService(); /* set solution as NOT_APPLICABLE */ TestSolution testSolution = TestSolution.NOT_APPLICABLE; List<DefiniteResult> netResultList = new ArrayList<>(); int elementCounter = 0; if (!groupedGrossResultList.isEmpty()) { // if some grossResult have been collected during process phasis, // we have elements to compare and the test result is set to // passed. testSolution = TestSolution.PASSED; Map<String, List<ProcessResult>> previousText = new HashMap<>(); // we parse all the result to populate a map where the key is an // encountered textual element and the value is a collection of pages // where that textual element has been found. If for a given page, // the textual element could not have been extracted (the element // is absent), a result is created with the NOT_APPLICABLE result for (ProcessResult grossResult : groupedGrossResultList) { if (grossResult.getValue() != null) { elementCounter++; String text = (String) grossResult.getValue(); if (previousText.containsKey(text)) { previousText.get(text).add(grossResult); } else { List<ProcessResult> urlList = new ArrayList<>(); urlList.add(grossResult); previousText.put(text, urlList); } } else { netResultList.add( processResultDataService.getDefiniteResult( test, grossResult.getSubject(), TestSolution.NOT_APPLICABLE, null)); } } // if all the elements are null if (previousText.isEmpty()) { testSolution = TestSolution.NOT_APPLICABLE; } else { Iterator<Map.Entry<String, List<ProcessResult>>> iter = previousText.entrySet().iterator(); List<ProcessResult> tmpElementList; while (iter.hasNext()) { // if the same element has been found twice Map.Entry<String, List<ProcessResult>> entry = iter.next(); tmpElementList = entry.getValue(); String tmpTagValue = entry.getKey(); if (tmpElementList.size() > 1 ) { TestSolution ts = computeResultAndCreateRemarks( tmpElementList, netResultList, tmpTagValue, elementCounter); if (ts.equals(TestSolution.FAILED)) { testSolution = TestSolution.FAILED; } } else { // at page level, the result is passed netResultList.add( createResultAtPageLevel( tmpElementList.iterator().next().getSubject(), TestSolution.PASSED, 0, null)); } } } } netResultList.add( processResultDataService.getDefiniteResult( test, group, testSolution, elementCounter, processRemarkService.getRemarkList())); return netResultList; } /** * This methods creates failed remarks at page scope and site scope when * duplicated are found. * @param netResultList * @param urlOnError */ private TestSolution computeResultAndCreateRemarks( List<ProcessResult> processResultList, List<DefiniteResult> netResultList, String elementValue, int elementCounter) { Collection<WebResource> wrsOnError = createUrlListFromProcessResultList( processResultList, netResultList); TestSolution testSolution = TestSolution.PASSED; if (CollectionUtils.isNotEmpty(wrsOnError)) { for (WebResource wr : wrsOnError) { testSolution = TestSolution.FAILED; prs.addConsolidationRemark( TestSolution.FAILED, siteLevelMessageCode, elementValue, wr.getURL()); Collection<ProcessRemark> processRemarkList = createProcessRemarkListForPageOnError( elementValue, wrsOnError); netResultList.add( createResultAtPageLevel( wr, TestSolution.FAILED, elementCounter, processRemarkList)); } // TO DO : set Passed the pages that have a correct rel=canonical // definition } else { netResultList.addAll( createResultAtPageLevel( wrsOnError, TestSolution.PASSED, 0, null)); } return testSolution; } /** * * @param processResultList * @return */ private Collection<DefiniteResult> createResultAtPageLevel( Collection<WebResource> webResourceList, TestSolution testSolution, int elementCounter, Collection<ProcessRemark> processRemarkList) { Collection<DefiniteResult> definiteResults = new ArrayList<>(); for (WebResource wr : webResourceList) { definiteResults.add( createResultAtPageLevel( wr, testSolution, elementCounter, processRemarkList)); } return definiteResults; } /** * * @param processResultList * @return */ private DefiniteResult createResultAtPageLevel( WebResource wr, TestSolution testSolution, int elementCounter, Collection<ProcessRemark> processRemarkList) { DefiniteResult result = processResultDataService.getDefiniteResult( test, wr, testSolution, processRemarkList); if (elementCounter > 0) { result.setElementCounter(elementCounter); } return result; } /** * For the element with a correct rel=canonical definition, a passed result * is thrown on the fly and added to the netResultList * * @param processResultList * @param netResultList * @return the webResource that have a duplicate element */ private Collection<WebResource> createUrlListFromProcessResultList( Collection<ProcessResult> processResultList, List<DefiniteResult> netResultList) { Collection<WebResource> pagesWithDuplicate = new HashSet<>(); Map<String, Collection<WebResource>> urlListWithRelCanonical = new HashMap<>(); // extraction for (ProcessResult processResult : processResultList) { String canonicalValue = getCanonicalValue(processResult); WebResource wr = processResult.getSubject(); if (StringUtils.isNotBlank(canonicalValue)) { if (urlListWithRelCanonical.containsKey(canonicalValue)) { urlListWithRelCanonical.get(canonicalValue).add(wr); } else { Collection<WebResource> wrs = new ArrayList<>(); wrs.add(wr); urlListWithRelCanonical.put(canonicalValue, wrs); } } else { pagesWithDuplicate.add(wr); } } // process if (pagesWithDuplicate.size() == 1 ) { String canonicalUrl = pagesWithDuplicate.iterator().next().getURL(); if (urlListWithRelCanonical.size() == 1) { String canonicalValue = urlListWithRelCanonical.keySet().iterator().next(); if (StringUtils.equalsIgnoreCase(canonicalUrl, canonicalValue)) { // if all the pages with the rel canonical point to // a unique page defined by the href value, a new empty list // is returned and the test is passed netResultList.addAll( createResultAtPageLevel( urlListWithRelCanonical.get(canonicalValue), TestSolution.PASSED, 0, null)); netResultList.add( createResultAtPageLevel( pagesWithDuplicate.iterator().next(), TestSolution.PASSED, 0, null)); return Collections.<WebResource>emptyList(); } else { // if all the pages with the rel canonical don't point to // a unique page defined by the href value, all is on error pagesWithDuplicate.addAll(urlListWithRelCanonical.get(canonicalValue)); } } else { for (String entry : urlListWithRelCanonical.keySet()) { // the pages with a rel canonical that don't point to // a unique page defined by the href value, are set on error if (!StringUtils.equalsIgnoreCase(entry, canonicalUrl)) { pagesWithDuplicate.addAll(urlListWithRelCanonical.get(entry)); } else { netResultList.addAll( createResultAtPageLevel( urlListWithRelCanonical.get(entry), TestSolution.PASSED, 0, null)); } } } } else { // if a rel canonical value hasn't been encountered, the related // pages are set as duplication Collection<String> urlsWithDuplicate = getUrlsFromWebResources(pagesWithDuplicate); for (String entry : urlListWithRelCanonical.keySet()) { if (!urlsWithDuplicate.contains(entry)) { pagesWithDuplicate.addAll(urlListWithRelCanonical.get(entry)); } else { netResultList.addAll( createResultAtPageLevel( urlListWithRelCanonical.get(entry), TestSolution.PASSED, 0, null)); } } } return pagesWithDuplicate; } /** * * @param processResult * @return the extracted canonical value when it exists, null instead. */ private String getCanonicalValue(ProcessResult processResult) { Collection<ProcessRemark> processRemarks = processRemarkDataService.findProcessRemarksFromProcessResult(processResult, -1); /*-1 means no limit*/ if (CollectionUtils.isEmpty(processRemarks) || processRemarks.size() > 1){ return null; } ProcessRemark prk = processRemarks.iterator().next(); if (prk.getIssue().equals(TestSolution.PASSED) && prk.getMessageCode().equals(REL_CAN_VALUE_REMARK_MSG) ) { for (EvidenceElement ee : prk.getElementList()) { if (ee.getEvidence().getCode().equals(ProcessRemarkService.DEFAULT_EVIDENCE)) { return ee.getValue(); } } } return null; } /** * * @param webResources * @return the collection of urls regarding the webresources */ private Collection<String> getUrlsFromWebResources(Collection<WebResource> webResources) { Collection<String> urls = new ArrayList<>(); for (WebResource wr : webResources) { urls.add(wr.getURL()); } return urls; } /** * * @param tagValue * @param urlList * @param processRemarkService * @return */ private Collection<ProcessRemark> createProcessRemarkListForPageOnError( String tagValue, Collection<WebResource> wrList) { Collection<ProcessRemark> processRemarkList = new ArrayList<>(); for (WebResource wr : wrList) { processRemarkList.add( prs.createConsolidationRemark( TestSolution.FAILED, pageLevelMessageCode, tagValue, wr.getURL())); } return processRemarkList; } /** * * @param sspHandler * @param prs * @param remarks */ private void extractRelCanonical( SSPHandler sspHandler, ProcessRemarkService prs, Collection<ProcessRemark> remarks) { ElementSelector relCanonicalSelector = new SimpleElementSelector(CssLikeQueryStore.REL_CANONICAL_CSS_LIKE_QUERY); ElementHandler<Element> relCan = new ElementHandlerImpl(); relCanonicalSelector.selectElements(sspHandler, relCan); if (relCan.get().size() != 1) { return; } String relValue = ((Element)relCan.get().iterator().next()).absUrl(AttributeStore.HREF_ATTR); if (!StringUtils.equalsIgnoreCase(relValue, sspHandler.getSSP().getURI())) { remarks.add( prs.createConsolidationRemark( TestSolution.PASSED, REL_CAN_VALUE_REMARK_MSG, relValue, sspHandler.getSSP().getURI())); } } }