/* * WPCleaner: A tool to help on Wikipedia maintenance tasks. * Copyright (C) 2013 Nicolas Vervelle * * See README.txt file for licensing information. */ package org.wikipediacleaner.api.check.algorithm; import java.util.Collection; import java.util.List; import org.wikipediacleaner.api.check.CheckErrorResult; import org.wikipediacleaner.api.data.PageAnalysis; import org.wikipediacleaner.api.data.PageElementExternalLink; /** * Algorithm for analyzing error 93 of check wikipedia project. * Error 93: External link with double http:// */ public class CheckErrorAlgorithm093 extends CheckErrorAlgorithmBase { public CheckErrorAlgorithm093() { super("External link with double http://"); } private static String[] possiblePrefixes = { "http://", "http:/", "http:", "https://", "https:/", "https:", }; /** * Analyze a page to check if errors are present. * * @param analysis Page analysis. * @param errors Errors found in the page. * @param onlyAutomatic True if analysis could be restricted to errors automatically fixed. * @return Flag indicating if the error was found. */ @Override public boolean analyze( PageAnalysis analysis, Collection<CheckErrorResult> errors, boolean onlyAutomatic) { if (analysis == null) { return false; } // Analyze each external link List<PageElementExternalLink> links = analysis.getExternalLinks(); boolean result = false; String contents = analysis.getContents(); for (PageElementExternalLink link : links) { // Initialization boolean errorFound = false; int beginIndex = link.getBeginIndex(); String dest = link.getLink(); String prefix1 = null; String prefix2 = null; String withoutPrefix = null; // Check if there's a double prefix inside the link if (!errorFound) { for (String tmp1 : possiblePrefixes) { if ((prefix1 == null) && (tmp1.length() < dest.length()) && (tmp1.equalsIgnoreCase(dest.substring(0, tmp1.length())))) { for (String tmp2 : possiblePrefixes) { if ((prefix2 == null) && (tmp1.length() + tmp2.length() < dest.length()) && tmp2.equalsIgnoreCase(dest.substring( tmp1.length(), tmp1.length() + tmp2.length()))) { errorFound = true; prefix1 = tmp1; prefix2 = tmp2; withoutPrefix = dest.substring(tmp1.length() + tmp2.length()); } } } } // Check if there's a second prefix before the link if (!errorFound && !link.hasSquare()) { for (String tmp2 : possiblePrefixes) { if ((prefix2 == null) && (tmp2.length() < dest.length()) && (tmp2.equalsIgnoreCase(dest.substring(0, tmp2.length())))) { prefix2 = tmp2; } } for (String tmp1 : possiblePrefixes) { if ((prefix1 == null) && (beginIndex >= tmp1.length()) && (tmp1.equalsIgnoreCase(contents.substring(beginIndex - tmp1.length(), beginIndex)))) { prefix1 = tmp1; } } if ((prefix1 != null) && (prefix2 != null)) { errorFound = true; beginIndex -= prefix1.length(); withoutPrefix = dest.substring(prefix2.length()); } } // Raise error if (errorFound && (prefix1 != null) && (prefix2 != null)) { if (errors == null) { return true; } result = true; CheckErrorResult errorResult = createCheckErrorResult( analysis, beginIndex, link.getEndIndex()); if (prefix2.endsWith("//")) { String newDest = prefix2 + withoutPrefix; if (link.hasSquare()) { errorResult.addReplacement(PageElementExternalLink.createExternalLink( newDest, link.getText())); } else { errorResult.addReplacement(newDest); } } if (!prefix1.equalsIgnoreCase(prefix2) && (prefix1.endsWith("//"))) { String newDest = prefix1 + withoutPrefix; if (link.hasSquare()) { errorResult.addReplacement(PageElementExternalLink.createExternalLink( newDest, link.getText())); } else { errorResult.addReplacement(newDest); } } errors.add(errorResult); } } } return result; } }