/* * WPCleaner: A tool to help on Wikipedia maintenance tasks. * Copyright (C) 2013 Nicolas Vervelle * * See README.txt file for licensing information. */ package org.wikipediacleaner.api.check.algorithm; import java.util.Collection; import java.util.List; import org.wikipediacleaner.api.check.CheckErrorResult; import org.wikipediacleaner.api.data.PageAnalysis; import org.wikipediacleaner.api.data.PageElementTag; /** * Algorithm for analyzing error 100 of check wikipedia project. * Error 100: List tag (<ol>, <ul> or <li>) with no correct match. */ public class CheckErrorAlgorithm100 extends CheckErrorAlgorithmBase { public CheckErrorAlgorithm100() { super("List tag (<ol>, <ul> or <li>) with no correct match."); } /** * Analyze a page to check if errors are present. * * @param analysis Page analysis. * @param errors Errors found in the page. * @param onlyAutomatic True if analysis could be restricted to errors automatically fixed. * @return Flag indicating if the error was found. */ @Override public boolean analyze( PageAnalysis analysis, Collection<CheckErrorResult> errors, boolean onlyAutomatic) { if (analysis == null) { return false; } // Check every tag List<PageElementTag> tags = analysis.getTags(); if ((tags == null) || (tags.isEmpty())) { return false; } boolean result = false; for (PageElementTag tag : tags) { // Check if tag is an incomplete list tag boolean shouldReport = false; if (isListTag(tag) && !tag.isComplete()) { shouldReport = true; } // Special cases for <li> tags // @see https://html.spec.whatwg.org/multipage/semantics.html#the-li-element if (shouldReport && PageElementTag.TAG_HTML_LI.equals(tag.getNormalizedName())) { int index = tag.getBeginIndex(); if ((analysis.getSurroundingTag(PageElementTag.TAG_HTML_OL, index) != null) || (analysis.getSurroundingTag(PageElementTag.TAG_HTML_UL, index) != null)) { shouldReport = false; } } // Report error if (shouldReport) { if (errors == null) { return true; } result = true; int beginIndex = tag.getBeginIndex(); int endIndex = tag.getEndIndex(); String replacement = null; boolean automatic = false; // Manage suggestions if (PageElementTag.TAG_HTML_LI.equals(tag.getNormalizedName())) { int tmpIndex = beginIndex; String contents = analysis.getContents(); while ((tmpIndex > 0) && (contents.charAt(tmpIndex - 1) == ' ')) { tmpIndex--; } if ((tmpIndex == 0) || (contents.charAt(tmpIndex - 1) == '\n')) { tmpIndex = endIndex; boolean shouldContinue = true; while (shouldContinue && (tmpIndex < contents.length())) { char currentChar = contents.charAt(tmpIndex); if (currentChar == '\n') { shouldContinue = false; endIndex = tmpIndex; tmpIndex++; PageElementTag nextTag = analysis.isInTag(tmpIndex); if ((nextTag != null) && (nextTag.getBeginIndex() == tmpIndex) && isListTag(nextTag)) { replacement = contents.substring(beginIndex, endIndex) + PageElementTag.createTag(tag.getName(), true, false); automatic = true; } } else if (currentChar == '<') { PageElementTag nextTag = analysis.isInTag(tmpIndex); if ((nextTag != null) && (nextTag.getBeginIndex() == tmpIndex) && isListTag(nextTag)) { shouldContinue = false; } } tmpIndex++; } } } // Report error CheckErrorResult errorResult = createCheckErrorResult( analysis, beginIndex, endIndex); if (replacement != null) { errorResult.addReplacement(replacement, automatic); } errors.add(errorResult); } } return result; } /** * Test if a tag is a list tag. * * @param tag Tag. * @return True if it is a list tag. */ private static boolean isListTag(PageElementTag tag) { if (tag == null) { return false; } String tagName = tag.getNormalizedName(); if ((PageElementTag.TAG_HTML_LI.equalsIgnoreCase(tagName)) || (PageElementTag.TAG_HTML_OL.equalsIgnoreCase(tagName)) || (PageElementTag.TAG_HTML_UL.equalsIgnoreCase(tagName))) { return true; } return false; } /** * Automatic fixing of some errors in the page. * * @param analysis Page analysis. * @return Page contents after fix. */ @Override protected String internalAutomaticFix(PageAnalysis analysis) { return fixUsingAutomaticReplacement(analysis); } }