/*
* WPCleaner: A tool to help on Wikipedia maintenance tasks.
* Copyright (C) 2013 Nicolas Vervelle
*
* See README.txt file for licensing information.
*/
package org.wikipediacleaner.api.check.algorithm;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.wikipediacleaner.api.check.CheckErrorResult;
import org.wikipediacleaner.api.data.PageAnalysis;
import org.wikipediacleaner.api.data.PageElementTag;
import org.wikipediacleaner.api.data.PageElementTag.Parameter;
/**
* Algorithm for analyzing error 104 of check wikipedia project.
* Error 104: Unbalanced quotes in ref name
*/
public class CheckErrorAlgorithm104 extends CheckErrorAlgorithmBase {
public CheckErrorAlgorithm104() {
super("Unbalanced quotes in ref name");
}
/**
* Analyze a page to check if errors are present.
*
* @param analysis Page analysis.
* @param errors Errors found in the page.
* @param onlyAutomatic True if analysis could be restricted to errors automatically fixed.
* @return Flag indicating if the error was found.
*/
@Override
public boolean analyze(
PageAnalysis analysis,
Collection<CheckErrorResult> errors, boolean onlyAutomatic) {
if (analysis == null) {
return false;
}
// Check every "<"
boolean result = false;
String contents = analysis.getContents();
int maxLen = contents.length();
int currentIndex = 0;
while (currentIndex < maxLen) {
int nextIndex = currentIndex + 1;
boolean shouldReport = false;
if (contents.charAt(currentIndex) == '<') {
shouldReport = true;
}
if (shouldReport) {
// Ignore tags correctly detected
PageElementTag tag = analysis.isInTag(currentIndex);
if ((tag != null) && (tag.getBeginIndex() == currentIndex)) {
if (PageElementTag.TAG_WIKI_REF.equalsIgnoreCase(tag.getName())) {
boolean ok = true;
Parameter paramName = tag.getParameter("name");
if ((paramName != null) &&
paramName.hasUnbalancedQuotes()) {
ok = false;
}
if (ok) {
shouldReport = false;
}
} else {
shouldReport = false;
}
}
}
if (shouldReport) {
// Ignore comments
if (analysis.isInComment(currentIndex) != null) {
shouldReport = false;
}
}
if (shouldReport) {
// Ignore some tags
if ((analysis.getSurroundingTag(PageElementTag.TAG_HTML_CODE, currentIndex) != null) ||
(analysis.getSurroundingTag(PageElementTag.TAG_WIKI_MATH, currentIndex) != null) ||
(analysis.getSurroundingTag(PageElementTag.TAG_WIKI_MATH_CHEM, currentIndex) != null) ||
(analysis.getSurroundingTag(PageElementTag.TAG_WIKI_NOWIKI, currentIndex) != null) ||
(analysis.getSurroundingTag(PageElementTag.TAG_WIKI_PRE, currentIndex) != null) ||
(analysis.getSurroundingTag(PageElementTag.TAG_WIKI_SCORE, currentIndex) != null) ||
(analysis.getSurroundingTag(PageElementTag.TAG_WIKI_SOURCE, currentIndex) != null) ||
(analysis.getSurroundingTag(PageElementTag.TAG_WIKI_SYNTAXHIGHLIGHT, currentIndex) != null)) {
shouldReport = false;
}
}
// Ensure that it's in the form "<ref name"
int endIndex = currentIndex + 1;
if (shouldReport) {
while ((endIndex < maxLen) &&
Character.isWhitespace(contents.charAt(endIndex))) {
endIndex++;
}
if ((endIndex >= maxLen) || !contents.startsWith("ref", endIndex)) {
shouldReport = false;
} else {
endIndex += 3;
}
if ((endIndex >= maxLen) || !Character.isWhitespace(contents.charAt(endIndex))) {
shouldReport = false;
} else {
while ((endIndex < maxLen) && Character.isWhitespace(contents.charAt(endIndex))) {
endIndex++;
}
}
if ((endIndex >= maxLen) || !contents.startsWith("name", endIndex)) {
shouldReport = false;
} else {
endIndex += 4;
}
}
// Report error
if (shouldReport) {
if (errors == null) {
return true;
}
result = true;
// Compute possible end
int fullEnd = endIndex;
if ((fullEnd - 1 < contents.length()) && ("\n<>".indexOf(contents.charAt(fullEnd - 1)) < 0)) {
while ((fullEnd < contents.length()) && ("\n<>".indexOf(contents.charAt(fullEnd)) < 0)) {
fullEnd++;
}
}
if (fullEnd >= contents.length()) {
fullEnd = endIndex;
} else if (contents.charAt(fullEnd) == '>') {
fullEnd++;
} else if (contents.charAt(fullEnd) != '<') {
fullEnd = endIndex;
}
// Report error
CheckErrorResult errorResult = createCheckErrorResult(
analysis, currentIndex, fullEnd);
List<String> replacements = new ArrayList<>();
// Check if there's an equal sign after "name"
int equalSign = endIndex;
while ((equalSign < fullEnd) && (contents.charAt(equalSign) == ' ')) {
equalSign++;
}
if (contents.charAt(equalSign) != '=') {
equalSign = -1;
}
// Case like <ref name=a name>, <ref name=>, <ref name="a name>, <ref name=a name">...
if (contents.charAt(fullEnd - 1) == '>') {
int tmpIndex = (equalSign > 0) ? equalSign + 1 : endIndex;
while ((tmpIndex < fullEnd - 1) && (contents.charAt(tmpIndex) == '=')) {
tmpIndex++;
}
while ((tmpIndex < fullEnd - 1) &&
(" \"'”".indexOf(contents.charAt(tmpIndex)) >= 0)) {
tmpIndex++;
}
int startName = tmpIndex;
boolean finished = false;
while ((tmpIndex < fullEnd - 1) && !finished) {
char currentChar = contents.charAt(tmpIndex);
if (!Character.isLetter(currentChar) &&
!Character.isDigit(currentChar) &&
(" -_,.".indexOf(currentChar) < 0)) {
finished = true;
}
if (!finished) {
tmpIndex++;
}
}
int endName = tmpIndex;
while ((endName > startName) && (contents.charAt(endName - 1) == ' ')) {
endName--;
}
while ((tmpIndex < fullEnd - 1) &&
(" \"'”»".indexOf(contents.charAt(tmpIndex)) >= 0)) {
tmpIndex++;
}
boolean closing = false;
while ((tmpIndex < fullEnd - 1) && (contents.charAt(tmpIndex) == '/')) {
tmpIndex++;
closing = true;
}
while ((tmpIndex < fullEnd - 1) &&
(" \"".indexOf(contents.charAt(tmpIndex)) >= 0)) {
tmpIndex++;
}
if (tmpIndex == fullEnd - 1) {
String replacement = null;
if (endName > startName) {
replacement =
contents.substring(currentIndex, endIndex) +
"=\"" +
contents.substring(startName, endName) +
"\"" +
(closing ? " /" : "") +
">";
} else {
replacement = "<ref>";
}
if (!replacements.contains(replacement)) {
replacements.add(replacement);
}
} else if (equalSign > 0) {
tmpIndex = fullEnd - 1;
while ((tmpIndex > equalSign) &&
(" ".indexOf(contents.charAt(tmpIndex - 1)) >= 0)) {
tmpIndex--;
}
closing = false;
if ((tmpIndex > equalSign) && (contents.charAt(tmpIndex - 1) == '/')) {
closing = true;
tmpIndex--;
}
while ((tmpIndex > equalSign) &&
(" \u00A0/\"″“”„’»".indexOf(contents.charAt(tmpIndex - 1)) >= 0)) {
tmpIndex--;
}
int endValue = tmpIndex;
while ((tmpIndex > equalSign) &&
("=\"\n".indexOf(contents.charAt(tmpIndex - 1)) < 0)) {
tmpIndex--;
}
if (tmpIndex == equalSign + 1) {
while ((equalSign < endValue) &&
("= \u00A0\"″“”„‘’»".indexOf(contents.charAt(equalSign)) >= 0)) {
equalSign++;
}
StringBuilder replacement = new StringBuilder();
replacement.append(contents.substring(currentIndex, endIndex));
replacement.append("=\"");
replacement.append(contents.substring(equalSign, endValue));
replacement.append("\"");
if (closing) {
replacement.append(" /");
}
replacement.append(">");
replacements.add(replacement.toString());
}
}
}
for (String tmp : replacements) {
errorResult.addReplacement(tmp);
}
errors.add(errorResult);
}
currentIndex = nextIndex;
}
return result;
}
}