/*
* WPCleaner: A tool to help on Wikipedia maintenance tasks.
* Copyright (C) 2013 Nicolas Vervelle
*
* See README.txt file for licensing information.
*/
package org.wikipediacleaner.api.check.algorithm;
import java.util.Collection;
import java.util.List;
import org.wikipediacleaner.api.check.CheckErrorResult;
import org.wikipediacleaner.api.check.CheckErrorResult.ErrorLevel;
import org.wikipediacleaner.api.constants.WPCConfigurationStringList;
import org.wikipediacleaner.api.data.Namespace;
import org.wikipediacleaner.api.data.PageAnalysis;
import org.wikipediacleaner.api.data.PageElementComment;
import org.wikipediacleaner.api.data.PageElementExternalLink;
import org.wikipediacleaner.api.data.PageElementFunction;
import org.wikipediacleaner.api.data.PageElementPMID;
import org.wikipediacleaner.api.data.PageElementTag;
import org.wikipediacleaner.api.data.PageElementTemplate;
import org.wikipediacleaner.api.data.PageElementTemplate.Parameter;
/**
* Algorithm for analyzing error 102 of check wikipedia project.
* Error 102: PMID wrong syntax
*/
public class CheckErrorAlgorithm102 extends CheckErrorAlgorithmBase {
public CheckErrorAlgorithm102() {
super("PMID wrong syntax");
}
/** List of strings that could be before a PMID in <nowiki>. */
private final static String[] EXTEND_BEFORE_NOWIKI = {
"<nowiki>",
"<small>",
"(",
};
/** List of strings that could be after a PMID in <nowiki>. */
private final static String[] EXTEND_AFTER_NOWIKI = {
"</nowiki>",
"</small>",
")",
};
/**
* Analyze a page to check if errors are present.
*
* @param analysis Page analysis.
* @param errors Errors found in the page.
* @param onlyAutomatic True if analysis could be restricted to errors automatically fixed.
* @return Flag indicating if the error was found.
*/
@Override
public boolean analyze(
PageAnalysis analysis,
Collection<CheckErrorResult> errors, boolean onlyAutomatic) {
if (analysis == null) {
return false;
}
// Analyze each PMID
boolean result = false;
List<PageElementPMID> pmids = analysis.getPMIDs();
for (PageElementPMID pmid : pmids) {
boolean isError = false;
if (!pmid.isCorrect() && pmid.isValid()) {
isError = true;
}
// Exclude special configured values for PMID
/*if (isError && pmid.isTemplateParameter()) {
WPCConfiguration config = analysis.getWPCConfiguration();
List<String[]> specialValues = config.getStringArrayList(
WPCConfigurationStringList.PMID_SPECIAL_VALUES);
if ((specialValues != null) && !specialValues.isEmpty()) {
PageElementTemplate template = analysis.isInTemplate(pmid.getBeginIndex());
if (template != null) {
Parameter param = template.getParameterAtIndex(pmid.getBeginIndex());
if ((param != null) &&
(param.getName() != null) &&
(param.getName().trim().length() > 0)) {
String name = param.getName().trim();
for (String[] specialValue : specialValues) {
if ((specialValue.length > 2) &&
(Page.areSameTitle(template.getTemplateName(), specialValue[0])) &&
(name.equals(specialValue[1])) &&
(pmid.getPMIDNotTrimmed().equals(specialValue[2]))) {
isError = false;
}
}
}
}
}
}*/
// Exclude parameters in templates
if (isError &&
pmid.isTemplateParameter() &&
analysis.isInNamespace(Namespace.TEMPLATE)) {
PageElementTemplate template = analysis.isInTemplate(pmid.getBeginIndex());
if (template != null) {
Parameter param = template.getParameterAtIndex(pmid.getBeginIndex());
if (param != null) {
List<PageElementFunction> functions = analysis.getFunctions();
if (functions != null) {
for (PageElementFunction function : functions) {
int functionIndex = function.getBeginIndex();
if ((template == analysis.isInTemplate(functionIndex)) &&
(param == template.getParameterAtIndex(functionIndex))) {
isError = false;
}
}
}
}
}
}
// Report error
boolean reported = false;
if (isError) {
if (errors == null) {
return true;
}
result = true;
reported = true;
CheckErrorResult errorResult = createCheckErrorResult(analysis, pmid, false);
errors.add(errorResult);
List<String> replacements = pmid.getCorrectPMID();
if (replacements != null) {
for (String replacement : replacements) {
if (!replacement.equals(analysis.getContents().substring(pmid.getBeginIndex(), pmid.getEndIndex()))) {
errorResult.addReplacement(replacement);
}
}
}
}
// Analyze if PMID is inside an external link
if (!reported && !pmid.isTemplateParameter()) {
PageElementExternalLink link = analysis.isInExternalLink(pmid.getBeginIndex());
if ((link != null) && link.hasSquare() &&
(pmid.getBeginIndex() >= link.getBeginIndex() + link.getTextOffset()) &&
(link.getText() != null)) {
if (errors == null) {
return true;
}
result = true;
reported = true;
CheckErrorResult errorResult = createCheckErrorResult(
analysis, link.getBeginIndex(), link.getEndIndex());
int beginIndex = pmid.getBeginIndex();
int realEndIndex = pmid.getEndIndex();
String contents = analysis.getContents();
while ((beginIndex > 0) &&
(" ,;.(".indexOf(contents.charAt(beginIndex - 1)) >= 0)) {
beginIndex--;
}
int endIndex = realEndIndex;
while ((endIndex < link.getEndIndex()) &&
(")".indexOf(contents.charAt(endIndex)) >= 0)) {
endIndex++;
}
if (beginIndex > link.getBeginIndex() + link.getTextOffset()) {
String replacementPrefix =
contents.substring(link.getBeginIndex(), beginIndex) +
contents.substring(endIndex, link.getEndIndex()) +
contents.substring(beginIndex, pmid.getBeginIndex());
String textPrefix =
contents.substring(link.getBeginIndex(), link.getBeginIndex() + 7) +
"...]" +
contents.substring(beginIndex, pmid.getBeginIndex());
List<String> replacements = pmid.getCorrectPMID();
for (String replacement : replacements) {
errorResult.addReplacement(
replacementPrefix + replacement + contents.substring(realEndIndex, endIndex),
textPrefix + replacement + contents.substring(realEndIndex, endIndex));
}
errorResult.addReplacement(
replacementPrefix + contents.substring(pmid.getBeginIndex(), pmid.getEndIndex()),
textPrefix + contents.substring(pmid.getBeginIndex(), pmid.getEndIndex()));
if (endIndex < link.getEndIndex()) {
replacementPrefix =
contents.substring(link.getBeginIndex(), beginIndex) +
"]" +
contents.substring(beginIndex, pmid.getBeginIndex());
for (String replacement : replacements) {
errorResult.addReplacement(
replacementPrefix + replacement + contents.substring(pmid.getEndIndex(), link.getEndIndex() - 1),
textPrefix + replacement + contents.substring(pmid.getEndIndex(), link.getEndIndex() - 1));
}
errorResult.addReplacement(
replacementPrefix + contents.substring(pmid.getBeginIndex(), link.getEndIndex() - 1),
textPrefix + contents.substring(pmid.getBeginIndex(), link.getEndIndex() - 1));
}
} else if (endIndex >= link.getEndIndex() - 1) {
List<String> replacements = pmid.getCorrectPMID();
for (String replacement : replacements) {
errorResult.addReplacement(replacement);
}
errorResult.addReplacement(contents.substring(pmid.getBeginIndex(), pmid.getEndIndex()));
}
errors.add(errorResult);
}
}
}
// Report also PMID inside <nowiki> tags
List<PageElementTag> nowikiTags = analysis.getCompleteTags(PageElementTag.TAG_WIKI_NOWIKI);
if (nowikiTags != null) {
String contents = analysis.getContents();
for (PageElementTag nowikiTag : nowikiTags) {
if (!nowikiTag.isFullTag() && nowikiTag.isComplete()) {
String nowikiContent = contents.substring(
nowikiTag.getValueBeginIndex(), nowikiTag.getValueEndIndex());
int index = 0;
while (index < nowikiContent.length()) {
if (nowikiContent.startsWith(PageElementPMID.PMID_PREFIX, index)) {
int tmpIndex = index + PageElementPMID.PMID_PREFIX.length();
boolean hasSeparator = false;
while ((tmpIndex < nowikiContent.length()) &&
(PageElementPMID.EXTRA_CHARACTERS.indexOf(nowikiContent.charAt(tmpIndex)) >= 0)) {
hasSeparator = true;
tmpIndex++;
}
boolean hasCharacter = false;
int indexCharacter = tmpIndex;
boolean shouldContinue = true;
while (shouldContinue) {
int tmpIndex2 = tmpIndex;
shouldContinue = false;
while ((tmpIndex2 < nowikiContent.length()) &&
(PageElementPMID.EXTRA_CHARACTERS.indexOf(nowikiContent.charAt(tmpIndex2)) >= 0)) {
tmpIndex2++;
}
while ((tmpIndex2 < nowikiContent.length()) &&
(PageElementPMID.POSSIBLE_CHARACTERS.indexOf(nowikiContent.charAt(tmpIndex2)) >= 0)) {
hasCharacter = true;
shouldContinue = true;
tmpIndex2++;
}
if (shouldContinue) {
tmpIndex = tmpIndex2;
}
}
if (hasSeparator && hasCharacter) {
if (errors == null) {
return true;
}
result = true;
// Try to extend area
int beginIndex = nowikiTag.getValueBeginIndex() + index;
boolean extensionFound = false;
do {
extensionFound = false;
for (String before : EXTEND_BEFORE_NOWIKI) {
if ((beginIndex >= before.length()) &&
(contents.startsWith(before, beginIndex - before.length()))) {
extensionFound = true;
beginIndex -= before.length();
}
}
} while (extensionFound);
int endIndex = nowikiTag.getValueBeginIndex() + tmpIndex;
do {
extensionFound = false;
for (String after : EXTEND_AFTER_NOWIKI) {
if ((endIndex < contents.length()) &&
(contents.startsWith(after, endIndex))) {
extensionFound = true;
endIndex += after.length();
}
}
} while (extensionFound);
// Report error
CheckErrorResult errorResult = createCheckErrorResult(
analysis, beginIndex, endIndex);
if ((beginIndex <= nowikiTag.getCompleteBeginIndex()) &&
(endIndex >= nowikiTag.getCompleteEndIndex())) {
errorResult.addReplacement(contents.substring(
nowikiTag.getValueBeginIndex() + index,
nowikiTag.getValueBeginIndex() + tmpIndex));
List<String[]> pmidTemplates = analysis.getWPCConfiguration().getStringArrayList(
WPCConfigurationStringList.PMID_TEMPLATES);
if (pmidTemplates != null) {
for (String[] pmidTemplate : pmidTemplates) {
if (pmidTemplate.length > 2) {
String templateName = pmidTemplate[0];
String[] params = pmidTemplate[1].split(",");
Boolean suggested = Boolean.valueOf(pmidTemplate[2]);
if ((params.length > 0) && (Boolean.TRUE.equals(suggested))) {
StringBuilder replacement = new StringBuilder();
replacement.append("{{");
replacement.append(templateName);
replacement.append("|");
if (!"1".equals(params[0])) {
replacement.append(params[0]);
replacement.append("=");
}
replacement.append(nowikiContent.substring(indexCharacter, tmpIndex));
replacement.append("}}");
errorResult.addReplacement(replacement.toString());
}
}
}
}
}
errors.add(errorResult);
index = tmpIndex;
} else {
index += PageElementPMID.PMID_PREFIX.length();
}
} else {
index++;
}
}
}
}
}
return result;
}
/**
* @param analysis Page analysis.
* @param pmid PMID.
* @param checkForComment True to check for a comment after the PMID.
* @return Error result.
*/
protected CheckErrorResult createCheckErrorResult(
PageAnalysis analysis, PageElementPMID pmid,
boolean checkForComment) {
ErrorLevel level = (pmid.isValid() && !pmid.helpRequested()) ?
ErrorLevel.ERROR : ErrorLevel.WARNING;
if (checkForComment) {
String contents = analysis.getContents();
int index = pmid.getEndIndex();
while ((index < contents.length()) && (contents.charAt(index) == ' ')) {
index++;
}
if ((index < contents.length()) && (contents.charAt(index) == '<')) {
PageElementComment comment = analysis.isInComment(index);
if (comment != null) {
level = ErrorLevel.WARNING;
}
}
}
CheckErrorResult result = createCheckErrorResult(
analysis, pmid.getBeginIndex(), pmid.getEndIndex(), level);
return result;
}
}