/*
* WPCleaner: A tool to help on Wikipedia maintenance tasks.
* Copyright (C) 2013 Nicolas Vervelle
*
* See README.txt file for licensing information.
*/
package org.wikipediacleaner.api.check.algorithm;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import org.wikipediacleaner.api.API;
import org.wikipediacleaner.api.APIException;
import org.wikipediacleaner.api.APIFactory;
import org.wikipediacleaner.api.check.CheckErrorResult;
import org.wikipediacleaner.api.constants.EnumWikipedia;
import org.wikipediacleaner.api.data.Namespace;
import org.wikipediacleaner.api.data.Page;
import org.wikipediacleaner.api.data.PageAnalysis;
import org.wikipediacleaner.i18n.GT;
import org.wikipediacleaner.utils.Configuration;
import org.wikipediacleaner.utils.ConfigurationValueInteger;
/**
* Algorithm for analyzing error 520 of check wikipedia project.
* Error 520: Weird characters (pawns, snowmen) in main namespace
*/
public class CheckErrorAlgorithm520 extends CheckErrorAlgorithmBase {
public CheckErrorAlgorithm520() {
super("Weird characters");
}
/**
* Weird characters to look for.
*/
private final static String weirdCharacters = "♙☃"; // pawns, snowmen
/**
* Analyze a page to check if errors are present.
*
* @param analysis Page analysis.
* @param errors Errors found in the page.
* @param onlyAutomatic True if analysis could be restricted to errors automatically fixed.
* @return Flag indicating if the error was found.
*/
@Override
public boolean analyze(
PageAnalysis analysis,
Collection<CheckErrorResult> errors, boolean onlyAutomatic) {
if ((analysis == null) || (analysis.getPage() == null)) {
return false;
}
Integer ns = analysis.getPage().getNamespace();
if ((ns == null) || (ns.intValue() != Namespace.MAIN)) {
return false;
}
// Search weird characters
String contents = analysis.getContents();
boolean result = false;
for (int index = 0; index < contents.length(); index++) {
if (weirdCharacters.indexOf(contents.charAt(index)) >= 0) {
if (errors == null) {
return true;
}
result = true;
CheckErrorResult errorResult = createCheckErrorResult(
analysis, index, index + 1);
errorResult.addReplacement("");
errors.add(errorResult);
}
}
return result;
}
/**
* @return True if the error has a special list of pages.
*/
@Override
public boolean hasSpecialList() {
return (getAbuseFilter() != null);
}
/**
* @return Abuse filter.
*/
private Integer getAbuseFilter() {
String abuseFilter = getSpecificProperty("abuse_filter", true, true, false);
if ((abuseFilter != null) &&
(abuseFilter.trim().length() > 0)) {
try {
return Integer.valueOf(abuseFilter);
} catch (NumberFormatException e) {
// Nothing to do
}
}
return null;
}
/**
* Retrieve the list of pages in error.
*
* @param wiki Wiki.
* @param limit Maximum number of pages to retrieve.
* @return List of pages in error.
*/
@Override
public List<Page> getSpecialList(EnumWikipedia wiki, int limit) {
List<Page> result = null;
Integer abuseFilter = getAbuseFilter();
if (abuseFilter != null) {
API api = APIFactory.getAPI();
Configuration config = Configuration.getConfiguration();
int maxDays = config.getInt(wiki, ConfigurationValueInteger.MAX_DAYS_ABUSE_LOG);
try {
result = api.retrieveAbuseLog(wiki, abuseFilter, maxDays);
} catch (APIException e) {
//
}
}
return result;
}
/**
* Return the parameters used to configure the algorithm.
*
* @return Map of parameters (Name -> description).
*/
@Override
public Map<String, String> getParameters() {
Map<String, String> parameters = super.getParameters();
parameters.put(
"abuse_filter",
GT._("An identifier of an abuse filter that is triggered by weird characters."));
return parameters;
}
}