/* * WPCleaner: A tool to help on Wikipedia maintenance tasks. * Copyright (C) 2013 Nicolas Vervelle * * See README.txt file for licensing information. */ package org.wikipediacleaner.api.check.algorithm; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import org.wikipediacleaner.api.check.CheckErrorResult; import org.wikipediacleaner.api.check.CheckErrorResult.ErrorLevel; import org.wikipediacleaner.api.data.MagicWord; import org.wikipediacleaner.api.data.Namespace; import org.wikipediacleaner.api.data.PageAnalysis; import org.wikipediacleaner.api.data.PageElementImage; import org.wikipediacleaner.api.data.PageElementTag; import org.wikipediacleaner.i18n.GT; /** * Algorithm for analyzing error 523 of check wikipedia project. * Error 523: Duplicated image */ public class CheckErrorAlgorithm523 extends CheckErrorAlgorithmBase { public CheckErrorAlgorithm523() { super("Duplicated image"); } /** * Analyze a page to check if errors are present. * * @param analysis Page analysis. * @param errors Errors found in the page. * @param onlyAutomatic True if analysis could be restricted to errors automatically fixed. * @return Flag indicating if the error was found. */ @Override public boolean analyze( PageAnalysis analysis, Collection<CheckErrorResult> errors, boolean onlyAutomatic) { if ((analysis == null) || (analysis.getPage() == null)) { return false; } // Configuration int minSize = 64; String minSizeText = getSpecificProperty("min_size", true, true, false); if (minSizeText != null) { try { minSize = Integer.parseInt(minSizeText, 10); } catch (NumberFormatException e) { // Nothing to do } } // Memorize where each single image is Map<String, List<Element>> imagesMap = new HashMap<String, List<Element>>(); List<PageElementImage> images = analysis.getImages(); for (PageElementImage image : images) { boolean shouldAdd = true; // Only images that respect a minimum size if (shouldAdd) { PageElementImage.Parameter paramWidth = image.getParameter(MagicWord.IMG_WIDTH); if ((paramWidth != null) && (paramWidth.getContents() != null)) { String contents = paramWidth.getContents().replaceAll("\\D", ""); try { int size = Integer.parseInt(contents, 10); if (size < minSize) { shouldAdd = false; } } catch (NumberFormatException e) { // Nothing to do } } } // Ignore images with a page parameter if (shouldAdd) { PageElementImage.Parameter paramPage = image.getParameter(MagicWord.IMG_PAGE); if (paramPage != null) { shouldAdd = false; } } if (shouldAdd) { addImage(imagesMap, image.getImage(), image.getBeginIndex(), image.getEndIndex()); } } // Memorize where each image in a gallery is List<PageElementTag> galleryTags = analysis.getCompleteTags(PageElementTag.TAG_WIKI_GALLERY); String contents = analysis.getContents(); Namespace imageNamespace = analysis.getWikiConfiguration().getNamespace(Namespace.IMAGE); for (PageElementTag galleryTag : galleryTags) { if (galleryTag.getMatchingTag() != null) { PageElementTag endTag = galleryTag.getMatchingTag(); int beginIndex = galleryTag.getEndIndex(); int tmpIndex = beginIndex; while (tmpIndex <= endTag.getBeginIndex()) { if ((tmpIndex == endTag.getBeginIndex()) || (contents.charAt(tmpIndex) == '\n')) { String line = contents.substring(beginIndex, tmpIndex).trim(); int colonIndex = line.indexOf(':'); if ((colonIndex > 0) && (imageNamespace.isPossibleName(line.substring(0, colonIndex)))) { String imageName = line.substring(colonIndex + 1); int pipeIndex = imageName.indexOf('|', colonIndex); if (pipeIndex > 0) { imageName = imageName.substring(0, pipeIndex); } int beginImageIndex = beginIndex; int endImageIndex = beginImageIndex + colonIndex + 1 + imageName.length(); addImage(imagesMap, imageName, beginImageIndex, endImageIndex); } beginIndex = tmpIndex + 1; } tmpIndex++; } } } // Analyze each title boolean result = false; for (List<Element> elements : imagesMap.values()) { if (elements.size() > 1) { if (errors == null) { return true; } result = true; for (int elementNum = 0; elementNum < elements.size(); elementNum++) { Element element = elements.get(elementNum); CheckErrorResult errorResult = createCheckErrorResult( analysis, element.beginIndex, element.endIndex, (elementNum == 0) ? ErrorLevel.WARNING : ErrorLevel.ERROR); errors.add(errorResult); } } } return result; } /** * Add an image in the map. * * @param imagesMap Map of images. * @param imageName Name of the image. * @param beginIndex Begin index of the image position. * @param endIndex End index of the image position. */ private void addImage( Map<String, List<Element>> imagesMap, String imageName, int beginIndex, int endIndex) { if ((imagesMap == null) || (imageName == null)) { return; } boolean shouldAdd = true; if (shouldAdd) { if (imageName.endsWith(".svg")) { shouldAdd = false; } } if (shouldAdd) { List<Element> elements = imagesMap.get(imageName); if (elements == null) { elements = new ArrayList<Element>(); imagesMap.put(imageName, elements); } elements.add(new Element(beginIndex, endIndex)); } } /** * @return Map of parameters (Name -> description). * @see org.wikipediacleaner.api.check.algorithm.CheckErrorAlgorithmBase#getParameters() */ @Override public Map<String, String> getParameters() { Map<String, String> parameters = super.getParameters(); parameters.put("min_size", GT._("The size below which images are not reported as duplicates")); return parameters; } /** * Bean for holding information about an image element. */ private static class Element { public final int beginIndex; public final int endIndex; public Element(int beginIndex, int endIndex) { this.beginIndex = beginIndex; this.endIndex = endIndex; } } }