/* * WPCleaner: A tool to help on Wikipedia maintenance tasks. * Copyright (C) 2013 Nicolas Vervelle * * See README.txt file for licensing information. */ package org.wikipediacleaner.api.check; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.regex.Pattern; import org.wikipediacleaner.api.check.algorithm.CheckErrorAlgorithm; import org.wikipediacleaner.api.check.algorithm.CheckErrorAlgorithms; import org.wikipediacleaner.api.constants.CWConfigurationError; import org.wikipediacleaner.api.constants.EnumWikipedia; import org.wikipediacleaner.api.data.DataManager; import org.wikipediacleaner.api.data.Page; import org.wikipediacleaner.api.data.PageAnalysis; import org.wikipediacleaner.i18n.GT; import org.wikipediacleaner.utils.Performance; /** * Managing errors defined in the check wikipedia project. */ public class CheckError { private static boolean traceTime = false; /** * Analyze a page to find error types. * * @param algorithms Possible algorithms. * @param pageAnalysis Page analysis. * @param onlyAutomatic True if analysis could be restricted to errors automatically fixed. * @return Errors found in the page. */ public static List<CheckErrorPage> analyzeErrors( Collection<CheckErrorAlgorithm> algorithms, PageAnalysis pageAnalysis, boolean onlyAutomatic) { Performance perf = new Performance("CheckError.analyzeErrors"); if (traceTime) { perf.printStart(); } List<CheckErrorPage> errorsFound = new ArrayList<CheckErrorPage>(); if ((algorithms != null) && (pageAnalysis != null) && (pageAnalysis.getContents() != null)) { for (CheckErrorAlgorithm algorithm : algorithms) { if ((algorithm != null) && (algorithm.isAvailable()) && (CWConfigurationError.isPriorityActive(algorithm.getPriority()))) { List<CheckErrorResult> results = new ArrayList<CheckErrorResult>(); boolean errorFound = false; int errorNumber = algorithm.getErrorNumber(); PageAnalysis.Result result = pageAnalysis.getCheckWikiErrors(errorNumber); if (result != null) { errorFound = result.getErrors(results); } else { errorFound = algorithm.analyze(pageAnalysis, results, onlyAutomatic); pageAnalysis.setCheckWikiErrors(errorNumber, errorFound, results); } if (errorFound) { CheckErrorPage errorPage = new CheckErrorPage(pageAnalysis.getPage(), algorithm); errorPage.setResults(true, results); errorsFound.add(errorPage); } if (traceTime) { String message = "Error n°" + algorithm.getErrorNumber() + ", " + errorFound + ", " + results.size() + " occurrences"; perf.printStep(message); } } } } if (traceTime) { perf.printEnd(); } return errorsFound; } /** * Analyze a page to find errors of a given type. * * @param algorithm Algorithm. * @param pageAnalysis Page analysis. * @return Error page. */ public static CheckErrorPage analyzeError( CheckErrorAlgorithm algorithm, PageAnalysis pageAnalysis) { if ((algorithm == null) || (pageAnalysis == null)) { return null; } Performance perf = new Performance("CheckError.analyzeError"); CheckErrorPage errorPage = new CheckErrorPage(pageAnalysis.getPage(), algorithm); boolean errorFound = false; List<CheckErrorResult> errorsFound = new ArrayList<CheckErrorResult>(); int errorNumber = algorithm.getErrorNumber(); PageAnalysis.Result result = pageAnalysis.getCheckWikiErrors(errorNumber); if (result != null) { errorFound = result.getErrors(errorsFound); } else { errorFound = algorithm.analyze(pageAnalysis, errorsFound, false); pageAnalysis.setCheckWikiErrors(errorNumber, errorFound, errorsFound); } errorPage.setResults(errorFound, errorsFound); if (traceTime) { perf.printStep("Error n°" + algorithm.getErrorNumber()); } return errorPage; } /** * @param initialErrors List of initial errors. * @param contents Current contents. * @param shouldCheckSpelling True if spelling should be checked. * @return Information about errors fixed. */ public static List<Progress> computeErrorsFixed( List<CheckErrorPage> initialErrors, String contents, boolean shouldCheckSpelling) { final List<Progress> errorsFixed = new ArrayList<>(); PageAnalysis analysis = null; if (initialErrors != null) { for (CheckErrorPage initialError : initialErrors) { if (analysis == null) { analysis = initialError.getPage().getAnalysis(contents, true); analysis.shouldCheckSpelling(shouldCheckSpelling); } CheckErrorPage errorPage = analyzeError( initialError.getAlgorithm(), analysis); if ((errorPage.getErrorFound() == false) || (errorPage.getActiveResultsCount() < initialError.getActiveResultsCount())) { errorsFixed.add(new Progress(initialError.getAlgorithm(), errorPage.getErrorFound() == false)); } } } return errorsFixed; } /** * @param errors Errors list. * @param wikipedia Wikipedia. * @param errorNumber Error number. * @param stream Stream containing list of pages for the error number. */ public static void addCheckErrorClassic( List<CheckError> errors, EnumWikipedia wikipedia, int errorNumber, InputStream stream) { // Analyze properties to find informations about error number if (!CheckErrorAlgorithms.isAlgorithmActive(wikipedia, errorNumber)) { return; } // Create error CheckError error = new CheckError(wikipedia, errorNumber); if (stream != null) { BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(stream, "UTF-8")); String line = null; while (((line = reader.readLine()) != null) && !line.endsWith("<pre>")) { // Waiting for <pre> } while (((line = reader.readLine()) != null) && !line.startsWith("</pre>")) { line = line.replaceAll(Pattern.quote("'"), "'"); line = line.replaceAll(Pattern.quote("""), "\""); line = line.replaceAll(Pattern.quote("&"), "&"); error.addPage(line, null); } } catch (UnsupportedEncodingException e) { // } catch (IOException e) { // } finally { if (reader != null) { try { reader.close(); } catch (IOException e) { // } } } } // Add / Replace error for (int i = errors.size(); i > 0; i--) { if (errors.get(i - 1).getErrorNumber() == errorNumber) { errors.remove(i - 1); } } errors.add(error); } /** * @param errors Errors list. * @param wikipedia Wikipedia. * @param errorNumber Error number. * @param stream Stream containing list of pages for the error number. */ public static void addCheckErrorBots( List<CheckError> errors, EnumWikipedia wikipedia, int errorNumber, InputStream stream) { // Analyze properties to find informations about error number if (!CheckErrorAlgorithms.isAlgorithmActive(wikipedia, errorNumber)) { return; } // Create error CheckError error = new CheckError(wikipedia, errorNumber); if (stream != null) { BufferedReader reader = null; try { // Read all lines reader = new BufferedReader(new InputStreamReader(stream, "UTF-8")); ArrayList<String> lines = new ArrayList<String>(); String line = null; while ((line = reader.readLine()) != null) { lines.add(line); } if ((lines.size() == 1) && (lines.get(0).indexOf("\\n") > 0)) { String[] splittedLines = lines.get(0).split("\\\\n"); lines.clear(); for (String splittedLine : splittedLines) { lines.add(splittedLine); } } for (String tmpLine : lines) { String[] elements = tmpLine.split("\\|"); String pageName = null; Integer pageId = null; for (String element : elements) { int equalIndex = element.indexOf("="); if (equalIndex > 0) { String attribute = element.substring(0, equalIndex); if ("title".equals(attribute)) { pageName = element.substring(equalIndex + 1); } else if ("pageid".equals(attribute)) { try { pageId = Integer.valueOf(element.substring(equalIndex + 1)); } catch (NumberFormatException e) { // } } } } // System.err.println("Line: " + tmpLine); if ((pageName != null) && (pageName.trim().length() > 0)) { pageName = pageName.replaceAll(Pattern.quote("'"), "'"); pageName = pageName.replaceAll(Pattern.quote("""), "\""); pageName = pageName.replaceAll(Pattern.quote("&"), "&"); error.addPage(pageName, pageId); } } } catch (UnsupportedEncodingException e) { // } catch (IOException e) { // } finally { if (reader != null) { try { reader.close(); } catch (IOException e) { // } } } } // Add / Replace error for (int i = errors.size(); i > 0; i--) { if (errors.get(i - 1).getErrorNumber() == errorNumber) { errors.remove(i - 1); } } errors.add(error); } /** * @param errors Errors list. * @param wikipedia Wikipedia. * @param errorNumber Error number. * @param pages List of pages in error. */ public static void addCheckErrorPages( List<CheckError> errors, EnumWikipedia wikipedia, int errorNumber, List<Page> pages) { // Analyze properties to find informations about error number if (!CheckErrorAlgorithms.isAlgorithmActive(wikipedia, errorNumber)) { return; } // Check that the list of pages in error is not empty if ((pages == null) || (pages.isEmpty())) { return; } // Create error CheckError error = new CheckError(wikipedia, errorNumber); for (Page page : pages) { error.addPage(page.getTitle(), page.getPageId()); } // Add / Replace error for (int i = errors.size(); i > 0; i--) { if (errors.get(i - 1).getErrorNumber() == errorNumber) { errors.remove(i - 1); } } errors.add(error); } private final EnumWikipedia wikipedia; private final int errorNumber; private final CheckErrorAlgorithm algorithm; private final List<Page> errors; /** * Constructor * * @param wikipedia Wikipedia. * @param errorNumber Error number as defined in the check wikipedia project. */ private CheckError( EnumWikipedia wikipedia, int errorNumber) { this.wikipedia = wikipedia; this.algorithm = CheckErrorAlgorithms.getAlgorithm(wikipedia, errorNumber); this.errorNumber = errorNumber; this.errors = new ArrayList<Page>(); } /** * @return Error number as defined in the check wikipedia project. */ public int getErrorNumber() { return errorNumber; } /** * @return Algorithm. */ public CheckErrorAlgorithm getAlgorithm() { return algorithm; } /** * @return Number of error pages. */ public int getPageCount() { return errors.size(); } /** * @param index Page index. * @return Error page. */ public Page getPage(int index) { if ((index < 0) || (index >= errors.size())) { return null; } return errors.get(index); } /** * Add a page to the list of errors. * * @param page Page. * @param pageId Page id. */ private void addPage(String page, Integer pageId) { Page tmpPage = DataManager.getPage(wikipedia, page, pageId, null, null); if (!errors.contains(tmpPage)) { errors.add(tmpPage); } } /* (non-Javadoc) * @see java.lang.Object#toString() */ @Override public String toString() { String count = Integer.toString(errors.size()); int priority = CWConfigurationError.PRIORITY_UNKOWN; String description = GT._("No available description"); if (algorithm != null) { priority = algorithm.getPriority(); description = algorithm.getShortDescriptionReplaced(); } return GT._("Error n°{0} ({1} - {2}) - {3}", new Object[] { Integer.valueOf(errorNumber), count, CWConfigurationError.getPriorityString(priority), description }); } /** * Remove a page from the list of errors. * * @param page Page. * @return True if a page has been removed from the list. */ public boolean remove(Page page) { if (page == null) { return false; } boolean removed = false; synchronized (errors) { for (int i = errors.size(); i > 0; i--) { if (Page.areSameTitle(page.getTitle(), errors.get(i - 1).getTitle())) { errors.remove(i - 1); removed = true; } } } return removed; } /** * Bean for holding progress report on fixing errors. */ public static class Progress { /** Algorithm */ final public CheckErrorAlgorithm algorithm; /** True if error has been completely fixed */ final public boolean full; /** * @param algorithm Algorithm. * @param full True if error has been completely fixed. */ public Progress(CheckErrorAlgorithm algorithm, boolean full) { this.algorithm = algorithm; this.full = full; } } }