/*
* WPCleaner: A tool to help on Wikipedia maintenance tasks.
* Copyright (C) 2013 Nicolas Vervelle
*
* See README.txt file for licensing information.
*/
package org.wikipediacleaner.api.check.algorithm;
import java.util.Collection;
import java.util.List;
import org.wikipediacleaner.api.check.CheckErrorResult;
import org.wikipediacleaner.api.data.PageAnalysis;
import org.wikipediacleaner.api.data.PageElementComment;
import org.wikipediacleaner.api.data.PageElementExternalLink;
import org.wikipediacleaner.api.data.PageElementTag;
import org.wikipediacleaner.api.data.PageElementTemplate;
import org.wikipediacleaner.i18n.GT;
/**
* Algorithm for analyzing error 62 of check wikipedia project.
* Error 62: URL containing no http://
*/
public class CheckErrorAlgorithm062 extends CheckErrorAlgorithmBase {
public CheckErrorAlgorithm062() {
super("URL containing no http://");
}
/**
* Analyze a page to check if errors are present.
*
* @param analysis Page analysis.
* @param errors Errors found in the page.
* @param onlyAutomatic True if analysis could be restricted to errors automatically fixed.
* @return Flag indicating if the error was found.
*/
@Override
public boolean analyze(
PageAnalysis analysis,
Collection<CheckErrorResult> errors, boolean onlyAutomatic) {
if (analysis == null) {
return false;
}
// Analyze every reference
boolean result = false;
List<PageElementTag> refs = analysis.getCompleteTags(PageElementTag.TAG_WIKI_REF);
if ((refs == null) || (refs.isEmpty())) {
return false;
}
for (PageElementTag ref : refs) {
if (!ref.isFullTag() && ref.isComplete()) {
result |= analyzeArea(
analysis, ref.getValueBeginIndex(), ref.getValueEndIndex(),
errors);
if ((errors == null) && result) {
return true;
}
}
}
return result;
}
/**
* Prefixes to look for.
*/
private final static String[] prefixes = {
"www."
};
/**
* Analyze an area for finding URL without http://
*
* @param analysis Page analysis.
* @param beginIndex Begin index of the area to analyze.
* @param endIndex End index of the area to analyze.
* @param errors Errors found in the page.
* @return Flag indicating if the error was found.
*/
public boolean analyzeArea(
PageAnalysis analysis, int beginIndex, int endIndex,
Collection<CheckErrorResult> errors) {
boolean result = false;
String text = analysis.getContents().substring(beginIndex, endIndex);
int index = 0;
while (index < text.length()) {
int nextIndex = index + 1;
for (String prefix : prefixes) {
if (text.startsWith(prefix, index) &&
(index + prefix.length() < text.length()) &&
(Character.isLetterOrDigit(text.charAt(index + prefix.length())))) {
boolean shouldCount = true;
int currentIndex = beginIndex + index;
if (shouldCount) {
// Check for external link
PageElementExternalLink link = analysis.isInExternalLink(currentIndex);
if (link != null) {
shouldCount = false;
}
}
if (shouldCount) {
// Check for comment
PageElementComment comment = analysis.isInComment(currentIndex);
if (comment != null) {
shouldCount = false;
}
}
if (shouldCount) {
// Check for template
PageElementTemplate template = analysis.isInTemplate(currentIndex);
if (template != null) {
for (int numParam = 0; numParam < template.getParameterCount(); numParam++) {
if (template.getParameterValueStartIndex(numParam) == currentIndex) {
shouldCount = false;
}
}
}
}
if (shouldCount) {
// Check characters before
int tmpIndex = index - 1;
boolean done = false;
while (shouldCount && !done && (tmpIndex > 0)) {
char currentChar = text.charAt(tmpIndex);
if ((currentChar == '/') && (text.charAt(tmpIndex - 1) == '/')) {
done = true;
shouldCount = false;
} else {
if (!Character.isLetter(currentChar) &&
!Character.isDigit(currentChar) &&
("/_-.".indexOf(currentChar) < 0)) {
done = true;
}
}
tmpIndex--;
}
}
if (shouldCount) {
if (errors == null) {
return true;
}
result = true;
CheckErrorResult errorResult = createCheckErrorResult(
analysis,
currentIndex, currentIndex + prefix.length());
errorResult.addReplacement(
"http://" + analysis.getContents().substring(currentIndex, currentIndex + prefix.length()),
GT._("Add {0}", "http://"));
errorResult.addReplacement("", GT._("Remove {0}", prefix));
errors.add(errorResult);
}
}
}
index = nextIndex;
}
return result;
}
}