/*
* WPCleaner: A tool to help on Wikipedia maintenance tasks.
* Copyright (C) 2013 Nicolas Vervelle
*
* See README.txt file for licensing information.
*/
package org.wikipediacleaner.api.check.algorithm;
import java.util.Collection;
import java.util.List;
import org.wikipediacleaner.api.check.CheckErrorResult;
import org.wikipediacleaner.api.data.PageAnalysis;
import org.wikipediacleaner.api.data.PageElementInternalLink;
import org.wikipediacleaner.api.data.PageElementTag;
/**
* Algorithm for analyzing error 510 of check wikipedia project.
* Error 510: Non working pipe trick
*/
public class CheckErrorAlgorithm510 extends CheckErrorAlgorithmBase {
public CheckErrorAlgorithm510() {
super("Non working pipe trick");
}
/**
* Analyze a page to check if errors are present.
*
* @param analysis Page analysis.
* @param errors Errors found in the page.
* @param onlyAutomatic True if analysis could be restricted to errors automatically fixed.
* @return Flag indicating if the error was found.
*/
@Override
public boolean analyze(
PageAnalysis analysis,
Collection<CheckErrorResult> errors, boolean onlyAutomatic) {
if ((analysis == null) || (analysis.getInternalLinks() == null)) {
return false;
}
// Analyze each internal link
boolean result = false;
List<PageElementInternalLink> links = analysis.getInternalLinks();
for (PageElementInternalLink link : links) {
if ((link.getText() != null) &&
(link.getText().length() == 0) &&
(link.getFullLink() != null)) {
boolean errorFound = false;
String target = link.getFullLink().trim();
if (analysis.getSurroundingTag(PageElementTag.TAG_WIKI_REF, link.getBeginIndex()) != null) {
// Check for namespace at the beginning
int beginIndex = 0;
if (target.length() > 1) {
int tmpIndex = target.indexOf(':');
if (tmpIndex > 0) {
beginIndex = tmpIndex + 1;
}
}
// Check for parenthesis or commas (remove the end part)
int endIndex = target.length();
if (endIndex > 0) {
if (target.charAt(endIndex - 1) == ')') {
int tmpIndex = target.lastIndexOf('(');
if (tmpIndex > 0) {
endIndex = tmpIndex;
}
}
if (endIndex == target.length()) {
int tmpIndex = target.indexOf(',');
if (tmpIndex > 0) {
endIndex = tmpIndex;
}
}
}
// Report error
if ((beginIndex > 0) || (endIndex < target.length())) {
if (errors == null) {
return true;
}
result = true;
errorFound = true;
if (beginIndex >= endIndex) {
beginIndex = 0;
}
CheckErrorResult errorResult = createCheckErrorResult(
analysis, link.getBeginIndex(), link.getEndIndex());
String replacement = PageElementInternalLink.createInternalLink(
target,
target.substring(beginIndex, endIndex));
errorResult.addReplacement(replacement);
replacement = PageElementInternalLink.createInternalLink(target, null);
errorResult.addReplacement(replacement);
errors.add(errorResult);
}
}
// Incorrect slash trick
if (!errorFound) {
int endIndex = target.length();
if ((endIndex > 1) && (target.charAt(0) == '/')) {
if (errors == null) {
return true;
}
result = true;
errorFound = true;
CheckErrorResult errorResult = createCheckErrorResult(
analysis, link.getBeginIndex(), link.getEndIndex());
String replacement = PageElementInternalLink.createInternalLink(
target + "/", null);
errorResult.addReplacement(replacement);
replacement = PageElementInternalLink.createInternalLink(target, null);
errorResult.addReplacement(replacement);
errors.add(errorResult);
}
}
// Link to section
if (!errorFound) {
int index = target.indexOf('#');
if ((index >= 0) && (index < target.length() - 1)) {
if (errors == null) {
return true;
}
result = true;
errorFound = true;
CheckErrorResult errorResult = createCheckErrorResult(
analysis, link.getBeginIndex(), link.getEndIndex());
String replacement = PageElementInternalLink.createInternalLink(
target, target.substring(index + 1));
errorResult.addReplacement(replacement);
replacement = PageElementInternalLink.createInternalLink(target, null);
errorResult.addReplacement(replacement);
errors.add(errorResult);
}
}
}
}
return result;
}
}