/*
* WPCleaner: A tool to help on Wikipedia maintenance tasks.
* Copyright (C) 2013 Nicolas Vervelle
*
* See README.txt file for licensing information.
*/
package org.wikipediacleaner.api.data;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Utilities methods for Page.
*/
public class PageUtilities {
/**
* Creates a Pattern for matching templates.
*
* @param template The interesting template.
* @return Pattern.
*/
public static Pattern createPatternForTemplate(TemplateMatch template) {
if (template == null) {
return null;
}
String title = template.getName();
// Create the regular expression
StringBuilder expression = new StringBuilder();
expression.append("\\{\\{"); // {{
expression.append("(");
addPatternForWhiteSpaces(expression);
addPatternForTitle(expression, title);
addPatternForWhiteSpaces(expression);
expression.append("(?:\\|(" +
"(?:" +
"(?:[^\\{\\}]" + "*+" + ")" + // Parameters text
"|" +
"(?:\\{\\{\\!\\}\\})" + // Special {{!}}
")*?" +
"))?"); // Possible parameters
expression.append(")\\}\\}"); // }}
Pattern pattern = Pattern.compile(expression.toString());
return pattern;
}
/**
* Retrieve template parameters.
*
* @param template Template matcher.
* @param matcher Current matcher.
* @param page Page.
* @return Template parameters.
*/
public static List<TemplateParameter> analyzeTemplateParameters(
TemplateMatch template, Matcher matcher, Page page) {
String text = null;
if (matcher.group(matcher.groupCount()) != null) {
text = matcher.group(matcher.groupCount());
} else {
text = template.getDefaultParameters();
}
return analyzeTemplateParameters(template, text, page);
}
/**
* Retrieve template parameters.
*
* @param template Template matcher.
* @param text Text.
* @param page Page.
* @return Template parameters.
*/
public static List<TemplateParameter> analyzeTemplateParameters(
TemplateMatch template, String text, Page page) {
String[] parameters = text.split("\\|");
if ((parameters == null) || (parameters.length == 0)) {
return null;
}
List<TemplateParameter> result = new ArrayList<TemplateParameter>(parameters.length);
int currentParam = 0;
for (int param = 0; param < parameters.length; param++) {
// Analyze each parameter
if (page != null) {
parameters[param] = parameters[param].replaceAll("\\{\\{PAGENAME\\}\\}", page.getTitleUcFirst());
parameters[param] = parameters[param].replaceAll("\\{\\{pagename\\}\\}", page.getTitleLcFirst());
}
String[] tokens = parameters[param].split("=");
String paramName = null;
String paramValue = null;
if (tokens.length == 1) {
currentParam++;
paramName = Integer.toString(currentParam);
paramValue = tokens[0];
} else {
paramName = tokens[0];
paramValue = tokens[1];
}
TemplateParameter templateParam = new TemplateParameter(paramName, paramValue);
for (int p = 0; p < template.getParametersCount(); p++) {
if (paramName.equals(template.getParameter(p))) {
templateParam.setRelevant(true);
}
}
result.add(templateParam);
}
return result;
}
/**
* @param expression Pattern being created.
* @param title Page title.
*/
public static void addPatternForTitle(StringBuilder expression, String title) {
if ((title == null) || (title.length() == 0)) {
return;
}
int begin = 0;
while (begin < title.length()) {
int space = title.indexOf(' ', begin);
if (space < 0) {
space = title.length();
}
if (begin == 0) {
expression.append("[");
expression.append(Character.toLowerCase(title.charAt(0)));
expression.append(Character.toUpperCase(title.charAt(0)));
expression.append("]");
expression.append(Pattern.quote(title.substring(1, space)));
} else {
expression.append(Pattern.quote(title.substring(begin, space)));
}
while ((space < title.length()) && (title.charAt(space) == ' ')) {
expression.append("[ _]");
space++;
}
begin = space;
}
}
/**
* @param expression Pattern being created.
*/
public static void addPatternForWhiteSpaces(StringBuilder expression) {
expression.append("\\s*+"); // Possible white characters
// Note: the possessive quantifier '+' is being used to find all white spaces
}
}