/*
* WPCleaner: A tool to help on Wikipedia maintenance tasks.
* Copyright (C) 2013 Nicolas Vervelle
*
* See README.txt file for licensing information.
*/
package org.wikipediacleaner.api.check.algorithm;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import org.wikipediacleaner.api.check.CheckErrorResult;
import org.wikipediacleaner.api.check.CheckErrorResult.ErrorLevel;
import org.wikipediacleaner.api.constants.WPCConfiguration;
import org.wikipediacleaner.api.constants.WPCConfigurationStringList;
import org.wikipediacleaner.api.data.Namespace;
import org.wikipediacleaner.api.data.Page;
import org.wikipediacleaner.api.data.PageAnalysis;
import org.wikipediacleaner.api.data.PageElement;
import org.wikipediacleaner.api.data.PageElementExternalLink;
import org.wikipediacleaner.api.data.PageElementFunction;
import org.wikipediacleaner.api.data.PageElementISBN;
import org.wikipediacleaner.api.data.PageElementInternalLink;
import org.wikipediacleaner.api.data.PageElementInterwikiLink;
import org.wikipediacleaner.api.data.PageElementTag;
import org.wikipediacleaner.api.data.PageElementTemplate;
import org.wikipediacleaner.api.data.PageElementTemplate.Parameter;
import org.wikipediacleaner.i18n.GT;
/**
* Algorithm for analyzing error 69 of check wikipedia project.
* Error 69: ISBN wrong syntax
*/
public class CheckErrorAlgorithm069 extends CheckErrorAlgorithmISBN {
public CheckErrorAlgorithm069() {
super("ISBN wrong syntax");
}
/** List of strings that could be before an ISBN in <nowiki>. */
private final static String[] EXTEND_BEFORE_NOWIKI = {
"<nowiki>",
"<small>",
"(",
};
/** List of strings that could be after an ISBN in <nowiki>. */
private final static String[] EXTEND_AFTER_NOWIKI = {
"</nowiki>",
"</small>",
")",
};
/** List of strings that could be between "ISBN" and its value. */
private final static String[] FIRST_SEPARATOR = {
" ",
" ",
};
/**
* Analyze a page to check if errors are present.
*
* @param analysis Page analysis.
* @param errors Errors found in the page.
* @param onlyAutomatic True if analysis could be restricted to errors automatically fixed.
* @return Flag indicating if the error was found.
*/
@Override
public boolean analyze(
PageAnalysis analysis,
Collection<CheckErrorResult> errors, boolean onlyAutomatic) {
if (analysis == null) {
return false;
}
// Analyze each ISBN
boolean result = false;
List<PageElementISBN> isbns = analysis.getISBNs();
for (PageElementISBN isbn : isbns) {
boolean isError = false;
if (!isbn.isCorrect() && isbn.isValid()) {
isError = true;
}
// Exclude special configured values for ISBN
if (isError && isbn.isTemplateParameter()) {
WPCConfiguration config = analysis.getWPCConfiguration();
List<String[]> specialValues = config.getStringArrayList(
WPCConfigurationStringList.ISBN_SPECIAL_VALUES);
if ((specialValues != null) && !specialValues.isEmpty()) {
PageElementTemplate template = analysis.isInTemplate(isbn.getBeginIndex());
if (template != null) {
Parameter param = template.getParameterAtIndex(isbn.getBeginIndex());
if ((param != null) &&
(param.getName() != null) &&
(param.getName().trim().length() > 0)) {
String name = param.getName().trim();
for (String[] specialValue : specialValues) {
if ((specialValue.length > 2) &&
(Page.areSameTitle(template.getTemplateName(), specialValue[0])) &&
(name.equals(specialValue[1])) &&
(isbn.getISBNNotTrimmed().equals(specialValue[2]))) {
isError = false;
}
}
}
}
}
}
// Exclude parameters in templates
if (isError &&
isbn.isTemplateParameter() &&
analysis.isInNamespace(Namespace.TEMPLATE)) {
PageElementTemplate template = analysis.isInTemplate(isbn.getBeginIndex());
if (template != null) {
Parameter param = template.getParameterAtIndex(isbn.getBeginIndex());
if (param != null) {
List<PageElementFunction> functions = analysis.getFunctions();
if (functions != null) {
for (PageElementFunction function : functions) {
int functionIndex = function.getBeginIndex();
if ((template == analysis.isInTemplate(functionIndex)) &&
(param == template.getParameterAtIndex(functionIndex))) {
isError = false;
}
}
}
}
}
}
// Report error
boolean reported = false;
if (isError) {
if (errors == null) {
return true;
}
result = true;
reported = true;
// Check for potential extra characters around
int beginIndex = isbn.getBeginIndex();
int endIndex = isbn.getEndIndex();
String contents = analysis.getContents();
boolean tryAgain = true;
while (tryAgain) {
tryAgain = false;
if ((beginIndex > 0) && (endIndex < contents.length())) {
char previousChar = contents.charAt(beginIndex - 1);
char nextChar = contents.charAt(endIndex);
if (((previousChar == '(') && (nextChar == ')')) ||
((previousChar == '[') && (nextChar == ']'))) {
beginIndex--;
endIndex++;
tryAgain = true;
}
}
}
final String SMALL_OPEN = "<small>";
final String SMALL_CLOSE = "</small>";
if ((beginIndex >= SMALL_OPEN.length()) && (endIndex < contents.length())) {
if (contents.startsWith(SMALL_OPEN, beginIndex - SMALL_OPEN.length()) &&
contents.startsWith(SMALL_CLOSE, endIndex)) {
beginIndex -= SMALL_OPEN.length();
endIndex += SMALL_CLOSE.length();
}
}
CheckErrorResult errorResult = createCheckErrorResult(analysis, isbn, false);
String prefix = null;
String suffix = null;
if ((beginIndex < isbn.getBeginIndex()) && (endIndex > isbn.getEndIndex())) {
prefix = contents.substring(beginIndex, isbn.getBeginIndex());
suffix = contents.substring(isbn.getEndIndex(), endIndex);
errorResult = createCheckErrorResult(
analysis, beginIndex, endIndex, errorResult.getErrorLevel());
}
addSuggestions(analysis, errorResult, isbn);
errors.add(errorResult);
List<String> replacements = isbn.getCorrectISBN();
if (replacements != null) {
for (String replacement : replacements) {
if (!replacement.equals(analysis.getContents().substring(isbn.getBeginIndex(), isbn.getEndIndex()))) {
if ((prefix != null) && (suffix != null)) {
errorResult.addReplacement(prefix + replacement + suffix);
}
errorResult.addReplacement(replacement);
}
}
}
}
// Analyze to find links to Special/BookSources
if (!reported && !isbn.isTemplateParameter()) {
PageElement element = null;
ErrorLevel level = ErrorLevel.CORRECT;
String isbnText = analysis.getContents().substring(isbn.getBeginIndex(), isbn.getEndIndex());
PageElementInternalLink link = analysis.isInInternalLink(isbn.getBeginIndex());
if ((link != null) && (isbnText.equals(link.getText()))) {
level = isSpecialBookSources(analysis, link.getLink());
if (level != ErrorLevel.CORRECT) {
element = link;
}
}
if (element == null) {
PageElementInterwikiLink iwLink = analysis.isInInterwikiLink(isbn.getBeginIndex());
if ((iwLink != null) && (isbnText.equals(iwLink.getText()))) {
level = isSpecialBookSources(analysis, iwLink.getLink());
if (level != ErrorLevel.CORRECT) {
element = iwLink;
}
}
}
if (element != null) {
if (errors == null) {
return true;
}
result = true;
reported = true;
CheckErrorResult errorResult = createCheckErrorResult(
analysis, element.getBeginIndex(), element.getEndIndex(), level);
List<String> replacements = isbn.getCorrectISBN();
for (String replacement : replacements) {
errorResult.addReplacement(replacement);
}
errors.add(errorResult);
}
}
// Analyze if ISBN is inside an external link
if (!reported && !isbn.isTemplateParameter()) {
PageElementExternalLink link = analysis.isInExternalLink(isbn.getBeginIndex());
if ((link != null) && link.hasSquare() &&
(isbn.getBeginIndex() >= link.getBeginIndex() + link.getTextOffset()) &&
(link.getText() != null)) {
if (errors == null) {
return true;
}
result = true;
reported = true;
CheckErrorResult errorResult = createCheckErrorResult(
analysis, link.getBeginIndex(), link.getEndIndex());
int beginIndex = isbn.getBeginIndex();
int realEndIndex = isbn.getEndIndex();
String contents = analysis.getContents();
while ((beginIndex > 0) &&
(" ,;.(".indexOf(contents.charAt(beginIndex - 1)) >= 0)) {
beginIndex--;
}
if (realEndIndex < link.getEndIndex()) {
int tmpIndex = realEndIndex;
while ((tmpIndex < link.getEndIndex()) &&
(", ".indexOf(contents.charAt(tmpIndex)) >= 0)) {
tmpIndex++;
}
if ((tmpIndex < link.getEndIndex()) &&
(contents.startsWith(isbn.getISBN(), tmpIndex))) {
realEndIndex = tmpIndex + isbn.getISBN().length();
}
}
int endIndex = realEndIndex;
while ((endIndex < link.getEndIndex()) &&
(")".indexOf(contents.charAt(endIndex)) >= 0)) {
endIndex++;
}
if (beginIndex > link.getBeginIndex() + link.getTextOffset()) {
String replacementPrefix =
contents.substring(link.getBeginIndex(), beginIndex) +
contents.substring(endIndex, link.getEndIndex()) +
contents.substring(beginIndex, isbn.getBeginIndex());
String textPrefix =
contents.substring(link.getBeginIndex(), link.getBeginIndex() + 7) +
"...]" +
contents.substring(beginIndex, isbn.getBeginIndex());
List<String> replacements = isbn.getCorrectISBN();
for (String replacement : replacements) {
errorResult.addReplacement(
replacementPrefix + replacement + contents.substring(realEndIndex, endIndex),
textPrefix + replacement + contents.substring(realEndIndex, endIndex));
}
errorResult.addReplacement(
replacementPrefix + contents.substring(isbn.getBeginIndex(), isbn.getEndIndex()),
textPrefix + contents.substring(isbn.getBeginIndex(), isbn.getEndIndex()));
if (endIndex < link.getEndIndex()) {
replacementPrefix =
contents.substring(link.getBeginIndex(), beginIndex) +
"]" +
contents.substring(beginIndex, isbn.getBeginIndex());
for (String replacement : replacements) {
errorResult.addReplacement(
replacementPrefix + replacement + contents.substring(isbn.getEndIndex(), link.getEndIndex() - 1),
textPrefix + replacement + contents.substring(isbn.getEndIndex(), link.getEndIndex() - 1));
}
errorResult.addReplacement(
replacementPrefix + contents.substring(isbn.getBeginIndex(), link.getEndIndex() - 1),
textPrefix + contents.substring(isbn.getBeginIndex(), link.getEndIndex() - 1));
}
} else if (endIndex >= link.getEndIndex() - 1) {
List<String> replacements = isbn.getCorrectISBN();
for (String replacement : replacements) {
errorResult.addReplacement(replacement);
}
errorResult.addReplacement(contents.substring(isbn.getBeginIndex(), isbn.getEndIndex()));
}
errors.add(errorResult);
}
}
}
// Report also ISBN like [[International Standard Book Number|ISBN]] 978-0321637734
List<PageElementInternalLink> links = analysis.getInternalLinks();
if (links != null) {
for (PageElementInternalLink link : links) {
if (PageElementISBN.ISBN_PREFIX.equals(link.getDisplayedText().trim())) {
int tmpIndex = link.getEndIndex();
String contents = analysis.getContents();
boolean shouldContinue = true;
while (shouldContinue) {
shouldContinue = false;
if (tmpIndex < contents.length()) {
if (" \u00A0".indexOf(contents.charAt(tmpIndex)) >= 0) {
tmpIndex++;
shouldContinue = true;
} else {
for (String separator : FIRST_SEPARATOR) {
if (contents.startsWith(separator, tmpIndex)) {
tmpIndex += separator.length();
shouldContinue = true;
}
}
}
}
}
boolean isbnFound = false;
int beginISBN = tmpIndex;
String suffix = null;
if (tmpIndex < contents.length()) {
PageElementInternalLink nextLink = null;
PageElementExternalLink nextLinkE = null;
if (contents.charAt(tmpIndex) == '[') {
nextLink = analysis.isInInternalLink(tmpIndex);
if (nextLink != null) {
tmpIndex += 2;
int offset = nextLink.getTextOffset();
if (offset > 0) {
tmpIndex += offset;
}
} else {
nextLinkE = analysis.isInExternalLink(tmpIndex);
if (nextLinkE != null) {
tmpIndex += 1;
int offset = nextLinkE.getTextOffset();
if (offset > 0) {
tmpIndex += offset;
}
}
}
}
if ((tmpIndex < contents.length()) &&
(PageElementISBN.POSSIBLE_CHARACTERS.indexOf(contents.charAt(tmpIndex)) >= 0)) {
isbnFound = true;
}
if (nextLink != null) {
suffix = nextLink.getDisplayedText();
tmpIndex = nextLink.getEndIndex();
} else if (nextLinkE != null) {
suffix = nextLinkE.getDisplayedText();
tmpIndex = nextLinkE.getEndIndex();
} else {
while ((tmpIndex < contents.length()) &&
((PageElementISBN.POSSIBLE_CHARACTERS.indexOf(contents.charAt(tmpIndex)) >= 0) ||
(PageElementISBN.EXTRA_CHARACTERS.indexOf(contents.charAt(tmpIndex)) >= 0 ))) {
tmpIndex++;
}
suffix = contents.substring(beginISBN, tmpIndex);
}
}
// Report error
if (isbnFound) {
if (errors == null) {
return true;
}
result = true;
CheckErrorResult errorResult = createCheckErrorResult(
analysis, link.getBeginIndex(), tmpIndex);
errorResult.addReplacement(
PageElementISBN.ISBN_PREFIX + " " + suffix);
errors.add(errorResult);
}
}
}
}
// Report also ISBN inside <nowiki> tags
List<PageElementTag> nowikiTags = analysis.getCompleteTags(PageElementTag.TAG_WIKI_NOWIKI);
if (nowikiTags != null) {
String contents = analysis.getContents();
for (PageElementTag nowikiTag : nowikiTags) {
if (!nowikiTag.isFullTag() && nowikiTag.isComplete()) {
String nowikiContent = contents.substring(
nowikiTag.getValueBeginIndex(), nowikiTag.getValueEndIndex());
int index = 0;
while (index < nowikiContent.length()) {
if (nowikiContent.startsWith(PageElementISBN.ISBN_PREFIX, index)) {
int tmpIndex = index + PageElementISBN.ISBN_PREFIX.length();
boolean hasSeparator = false;
while ((tmpIndex < nowikiContent.length()) &&
(PageElementISBN.EXTRA_CHARACTERS.indexOf(nowikiContent.charAt(tmpIndex)) >= 0)) {
hasSeparator = true;
tmpIndex++;
}
boolean hasCharacter = false;
int indexCharacter = tmpIndex;
boolean shouldContinue = true;
while (shouldContinue) {
int tmpIndex2 = tmpIndex;
shouldContinue = false;
while ((tmpIndex2 < nowikiContent.length()) &&
(PageElementISBN.EXTRA_CHARACTERS.indexOf(nowikiContent.charAt(tmpIndex2)) >= 0)) {
tmpIndex2++;
}
while ((tmpIndex2 < nowikiContent.length()) &&
(PageElementISBN.POSSIBLE_CHARACTERS.indexOf(nowikiContent.charAt(tmpIndex2)) >= 0)) {
hasCharacter = true;
shouldContinue = true;
tmpIndex2++;
}
if (shouldContinue) {
tmpIndex = tmpIndex2;
}
}
if (hasSeparator && hasCharacter) {
if (errors == null) {
return true;
}
result = true;
// Try to extend area
int beginIndex = nowikiTag.getValueBeginIndex() + index;
boolean extensionFound = false;
do {
extensionFound = false;
for (String before : EXTEND_BEFORE_NOWIKI) {
if ((beginIndex >= before.length()) &&
(contents.startsWith(before, beginIndex - before.length()))) {
extensionFound = true;
beginIndex -= before.length();
}
}
} while (extensionFound);
int endIndex = nowikiTag.getValueBeginIndex() + tmpIndex;
do {
extensionFound = false;
for (String after : EXTEND_AFTER_NOWIKI) {
if ((endIndex < contents.length()) &&
(contents.startsWith(after, endIndex))) {
extensionFound = true;
endIndex += after.length();
}
}
} while (extensionFound);
// Report error
CheckErrorResult errorResult = createCheckErrorResult(
analysis, beginIndex, endIndex);
if ((beginIndex <= nowikiTag.getCompleteBeginIndex()) &&
(endIndex >= nowikiTag.getCompleteEndIndex())) {
errorResult.addReplacement(contents.substring(
nowikiTag.getValueBeginIndex() + index,
nowikiTag.getValueBeginIndex() + tmpIndex));
List<String[]> isbnTemplates = analysis.getWPCConfiguration().getStringArrayList(
WPCConfigurationStringList.ISBN_TEMPLATES);
if (isbnTemplates != null) {
for (String[] isbnTemplate : isbnTemplates) {
if (isbnTemplate.length > 2) {
String templateName = isbnTemplate[0];
String[] params = isbnTemplate[1].split(",");
Boolean suggested = Boolean.valueOf(isbnTemplate[2]);
if ((params.length > 0) && (Boolean.TRUE.equals(suggested))) {
StringBuilder replacement = new StringBuilder();
replacement.append("{{");
replacement.append(templateName);
replacement.append("|");
if (!"1".equals(params[0])) {
replacement.append(params[0]);
replacement.append("=");
}
replacement.append(nowikiContent.substring(indexCharacter, tmpIndex));
replacement.append("}}");
errorResult.addReplacement(replacement.toString());
}
}
}
}
}
errors.add(errorResult);
index = tmpIndex;
} else {
index += PageElementISBN.ISBN_PREFIX.length();
}
} else {
index++;
}
}
}
}
}
return result;
}
/**
* @param analysis Page analysis.
* @param link Link destination.
* @return Error level.
*/
private ErrorLevel isSpecialBookSources(PageAnalysis analysis, String link) {
if (link == null) {
return ErrorLevel.CORRECT;
}
int colonIndex = link.indexOf(':');
if (colonIndex == 0) {
link = link.substring(1);
colonIndex = link.indexOf(':');
}
if (colonIndex > 0) {
Namespace special = analysis.getWikiConfiguration().getNamespace(Namespace.SPECIAL);
String prefix = link.substring(0, colonIndex);
if ((special != null) && (special.isPossibleName(prefix))) {
if (link.startsWith("BookSources", colonIndex + 1)) {
return ErrorLevel.ERROR;
}
return ErrorLevel.WARNING;
}
}
return ErrorLevel.CORRECT;
}
/**
* @param isbn ISBN number.
* @return Reason for the error.
*/
@Override
public String getReason(PageElementISBN isbn) {
if (isbn == null) {
return null;
}
String reasonTemplate = getSpecificProperty("reason", true, true, false);
if (reasonTemplate == null) {
return null;
}
return reasonTemplate;
}
/**
* Return the parameters used to configure the algorithm.
*
* @return Map of parameters (Name -> description).
*/
@Override
public Map<String, String> getParameters() {
Map<String, String> parameters = super.getParameters();
parameters.put(
"reason", GT._("An explanation of the problem"));
return parameters;
}
}