/*
* WPCleaner: A tool to help on Wikipedia maintenance tasks.
* Copyright (C) 2013 Nicolas Vervelle
*
* See README.txt file for licensing information.
*/
package org.wikipediacleaner.api.data;
import java.util.ArrayList;
import java.util.List;
import org.wikipediacleaner.api.constants.EnumWikipedia;
/**
* Class containing information about a complete function ({{<i>function</i>:...}}).
*/
public class PageElementFunction extends PageElement {
private final MagicWord magicWord;
private final String functionName;
private final String functionNameNotTrimmed;
private final List<Parameter> parameters;
private final static String functionNameUnauthorizedCharacters = "{}[]|<>:";
/**
* Class containing information about a function parameter.
*/
private static class Parameter {
final int separatorIndex;
final String name;
final int nameStartIndex;
final String valueNotTrimmed;
final String value;
final int valueStartIndex;
/**
* @param separatorIndex Index of the separator in page contents.
* @param name Parameter name.
* @param nameStartIndex Index of parameter name in page contents.
* @param value Parameter value.
* @param valueStartIndex Index of parameter value in page contents.
*/
public Parameter(
int separatorIndex,
String name, int nameStartIndex,
String value, int valueStartIndex) {
this.separatorIndex = separatorIndex;
this.name = (name != null) ? name.trim() : null;
this.nameStartIndex = nameStartIndex;
this.valueNotTrimmed = value;
this.value = (value != null) ? value.trim() : null;
this.valueStartIndex = valueStartIndex;
}
/* (non-Javadoc)
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
if ((name != null) && (!name.isEmpty())) {
return name + "=" + value;
}
return value;
}
}
/**
* Analyze contents to check if it matches a block.
*
* @param wiki Wiki.
* @param contents Contents.
* @param index Block start index.
* @param comments Comments in the page.
* @param tags Tags in the page.
* @return Block details it there's a block.
*/
public static PageElementFunction analyzeBlock(
EnumWikipedia wiki,
String contents, int index,
List<PageElementComment> comments,
List<PageElementTag> tags) {
// Verify arguments
if (contents == null) {
return null;
}
// Look for '{{'
int beginIndex = index;
int tmpIndex = beginIndex;
if ((tmpIndex >= contents.length() - 1) ||
(contents.charAt(tmpIndex) != '{') ||
(contents.charAt(tmpIndex + 1) != '{')) {
return null;
}
tmpIndex += 2;
boolean moved = false;
do {
moved = false;
// Possible whitespace characters
while ((tmpIndex < contents.length()) &&
((contents.charAt(tmpIndex) == ' ') ||
(contents.charAt(tmpIndex) == '\n'))) {
tmpIndex++;
moved = true;
}
// Possible comment
if ((tmpIndex < contents.length()) && (contents.charAt(tmpIndex) == '<')) {
PageElementComment comment = null;
if (comments != null) {
for (PageElementComment tmpComment : comments) {
if (tmpComment.getBeginIndex() == tmpIndex) {
comment = tmpComment;
}
}
}
if (comment == null) {
return null;
}
tmpIndex = comment.getEndIndex();
moved = true;
}
} while (moved);
int startFunctionName = tmpIndex;
// Retrieve function name
while (tmpIndex < contents.length()) {
char currentChar = contents.charAt(tmpIndex);
if (functionNameUnauthorizedCharacters.indexOf(currentChar) >= 0) {
break;
}
tmpIndex++;
}
if (tmpIndex >= contents.length()) {
return null;
}
String functionName = contents.substring(startFunctionName, tmpIndex).trim();
if (functionName.length() == 0) {
return null;
}
// Check that it's a function
MagicWord magicWord = wiki.getWikiConfiguration().getFunctionMagicWord(
functionName, contents.charAt(tmpIndex) == ':');
if (magicWord == null) {
return null;
}
do {
moved = false;
// Possible comment
if ((tmpIndex < contents.length()) && (contents.charAt(tmpIndex) == '<')) {
PageElementComment comment = null;
if (comments != null) {
for (PageElementComment tmpComment : comments) {
if (tmpComment.getBeginIndex() == tmpIndex) {
comment = tmpComment;
}
}
}
if (comment == null) {
return null;
}
tmpIndex = comment.getEndIndex();
moved = true;
}
// Possible whitespace characters
while ((tmpIndex < contents.length()) &&
((contents.charAt(tmpIndex) == ' ') ||
(contents.charAt(tmpIndex) == '\n'))) {
tmpIndex++;
moved = true;
}
} while (moved);
// Check if it's a function without parameters
if (contents.startsWith("}}", tmpIndex)) {
return new PageElementFunction(
magicWord, functionName,
beginIndex, tmpIndex + 2, null);
}
// Check if it's a function
if (contents.charAt(tmpIndex) != ':') {
return null;
}
// Analyze parameters
tmpIndex++;
List<Parameter> parameters = new ArrayList<Parameter>();
int endIndex = analyzeFunctionParameters(
wiki, contents, beginIndex, tmpIndex - 1, tmpIndex, parameters,
comments, tags);
if (endIndex < 0) {
return null;
}
return new PageElementFunction(
magicWord, functionName,
beginIndex, endIndex, parameters);
}
/**
* Analyze the parameters of function.
*
* @param wiki Wiki.
* @param contents Contents of the page.
* @param functionBeginIndex Start index of the function in the page.
* @param separatorIndex Index of the previous separator.
* @param parametersBeginIndex Start index of the parameters in the page.
* @param parameters Parameters.
* @param comments Comments in the page.
* @param tags Tags in the page.
* @return Position of the end of the function, or -1 if no function was found.
*/
private static int analyzeFunctionParameters(
EnumWikipedia wiki, String contents,
int functionBeginIndex, int separatorIndex, int parametersBeginIndex,
List<Parameter> parameters,
List<PageElementComment> comments,
List<PageElementTag> tags) {
if (contents == null) {
return -1;
}
int tmpIndex = parametersBeginIndex;
int maxLength = contents.length();
int depth2CurlyBrackets = 0;
int depth3CurlyBrackets = 0;
int depth2SquareBrackets = 0;
int depthTagNoWiki = 0;
int depthTagRef = 0;
int parameterBeginIndex = parametersBeginIndex;
int equalIndex = -1;
while (tmpIndex < maxLength) {
if (contents.startsWith("{{{", tmpIndex)) {
// Possible start of a parameter
tmpIndex += 3;
if (depthTagNoWiki == 0) {
depth3CurlyBrackets++;
}
} else if (contents.startsWith("{{", tmpIndex)) {
// Possible start of nested template
tmpIndex += 2;
if (depthTagNoWiki == 0) {
depth2CurlyBrackets++;
}
} else if (contents.startsWith("}}", tmpIndex)) {
if (contents.startsWith("}}}", tmpIndex) &&
(depth3CurlyBrackets > 0)) {
// Possible end of parameter
tmpIndex += 3;
if (depthTagNoWiki == 0) {
depth3CurlyBrackets--;
}
} else {
// Possible end of function
tmpIndex += 2;
if (depthTagNoWiki == 0) {
if (depth2CurlyBrackets > 0) {
depth2CurlyBrackets--;
} else {
addParameter(
parameters, separatorIndex,
contents.substring(parameterBeginIndex, tmpIndex - 2),
equalIndex - parameterBeginIndex,
parameterBeginIndex);
return tmpIndex;
}
}
}
} else if (contents.startsWith("[[", tmpIndex)) {
// Possible start of nested internal links
tmpIndex += 2;
if (depthTagNoWiki == 0) {
depth2SquareBrackets++;
}
} else if (contents.startsWith("]]", tmpIndex)) {
// Possible end of nested internal link
tmpIndex += 2;
if (depthTagNoWiki == 0) {
if (depth2SquareBrackets > 0) {
depth2SquareBrackets--;
} else {
return -1;
}
}
} else if (contents.startsWith("<", tmpIndex)) {
// Possible start of a tag
PageElementTag tag = null;
if (tags != null) {
for (PageElementTag tmpTag : tags) {
if (tmpTag.getBeginIndex() == tmpIndex) {
tag = tmpTag;
}
}
}
if (tag != null) {
int count = 0;
if (tag.isFullTag()) {
count = 0;
} else if (tag.isEndTag()) {
count = -1;
} else {
count = 1;
}
if (PageElementTag.TAG_WIKI_NOWIKI.equals(tag.getName())) {
depthTagNoWiki += count;
if (depthTagNoWiki < 0) {
depthTagNoWiki = 0;
}
} else if (PageElementTag.TAG_WIKI_REF.equals(tag.getName())) {
if (depthTagNoWiki == 0) {
depthTagRef += count;
if (depthTagRef < 0) {
depthTagRef = 0;
}
}
}
tmpIndex = tag.getEndIndex();
} else {
// Possible start of a comment
PageElementComment comment = null;
if (comments != null) {
for (PageElementComment tmpComment : comments) {
if (tmpComment.getBeginIndex() == tmpIndex) {
comment = tmpComment;
}
}
}
if (comment != null) {
tmpIndex = comment.getEndIndex();
} else {
tmpIndex++;
}
}
} else {
if ((depth2CurlyBrackets <= 0) &&
(depth2SquareBrackets <= 0) &&
(depthTagNoWiki <= 0) &&
(depthTagRef <= 0)) {
char currentChar = contents.charAt(tmpIndex);
if (currentChar == '|') {
// Separation with next parameter
depth2CurlyBrackets = 0;
depth2SquareBrackets = 0;
addParameter(
parameters, separatorIndex,
contents.substring(parameterBeginIndex, tmpIndex),
equalIndex - parameterBeginIndex,
parameterBeginIndex);
separatorIndex = tmpIndex;
tmpIndex++;
parameterBeginIndex = tmpIndex;
equalIndex = -1;
} else if ((currentChar == '=') && (equalIndex < 0)) {
equalIndex = tmpIndex;
tmpIndex++;
} else {
tmpIndex++;
}
} else {
tmpIndex++;
}
}
}
return -1;
}
/**
* @param parameters List of parameters.
* @param separatorIndex Index of separator.
* @param parameter New parameter (name=value or value).
* @param equalIndex Index of "=" in the parameter or < 0 if doesn't exist.
* @param offset Offset of parameter start index in page contents.
*/
private static void addParameter(
List<Parameter> parameters,
int separatorIndex, String parameter,
int equalIndex, int offset) {
if (equalIndex < 0) {
int spaces = 0;
while ((spaces < parameter.length()) && (Character.isWhitespace(parameter.charAt(spaces)))) {
spaces++;
}
parameters.add(new Parameter(
separatorIndex, "", offset + spaces, parameter, offset + spaces));
} else {
int spacesName = 0;
while ((spacesName < equalIndex) && (Character.isWhitespace(parameter.charAt(spacesName)))) {
spacesName++;
}
int spacesValue = equalIndex + 1;
while ((spacesValue < parameter.length()) && (Character.isWhitespace(parameter.charAt(spacesValue)))) {
spacesValue++;
}
parameters.add(new Parameter(
separatorIndex,
parameter.substring(0, equalIndex), offset + spacesName,
parameter.substring(equalIndex + 1), offset + spacesValue));
}
}
/**
* @return Magic word.
*/
public MagicWord getMagicWord() {
return magicWord;
}
/**
* @return Function name.
*/
public String getFunctionName() {
return functionName;
}
/**
* Get parameter count.
*
* @return Parameter count.
*/
public int getParameterCount() {
if (parameters == null) {
return 0;
}
return parameters.size();
}
/**
* Retrieve separator offset.
*
* @param index Parameter index.
* @return Separator offset.
*/
public int getParameterSeparatorOffset(int index) {
if ((index >= 0) && (index < parameters.size())) {
return parameters.get(index).separatorIndex;
}
return 0;
}
/**
* Retrieve parameter name.
*
* @param index Parameter index.
* @return Parameter name.
*/
public String getParameterName(int index) {
if ((index >= 0) && (index < parameters.size())) {
return parameters.get(index).name;
}
return null;
}
/**
* Retrieve parameter name offset.
*
* @param index Parameter index.
* @return Parameter name offset.
*/
public int getParameterNameOffset(int index) {
if ((index >= 0) && (index < parameters.size())) {
return parameters.get(index).nameStartIndex;
}
return 0;
}
/**
* Retrieve parameter value.
*
* @param index Parameter index.
* @return Parameter value.
*/
public String getParameterValue(int index) {
if ((index >= 0) && (index < parameters.size())) {
return parameters.get(index).value;
}
return null;
}
/**
* Retrieve parameter value not trimmed.
*
* @param index Parameter index.
* @return Parameter value not trimmed.
*/
public String getParameterValueNotTrimmed(int index) {
if ((index >= 0) && (index < parameters.size())) {
return parameters.get(index).valueNotTrimmed;
}
return null;
}
/**
* Retrieve parameter value offset.
*
* @param index Parameter index.
* @return Parameter value offset.
*/
public int getParameterValueOffset(int index) {
if ((index >= 0) && (index < parameters.size())) {
return parameters.get(index).valueStartIndex;
}
return 0;
}
/**
* Retrieve parameter value.
*
* @param name Parameter name.
* @return Parameter value.
*/
public String getParameterValue(String name) {
if (parameters == null) {
return null;
}
int index = 0;
int paramNum = 1;
while (index < parameters.size()) {
String parameterName = parameters.get(index).name;
if ((parameterName == null) || (parameterName.length() == 0)) {
parameterName = Integer.toString(paramNum);
}
if (parameterName.equals(Integer.toString(paramNum))) {
paramNum++;
}
if (name.equals(parameterName)) {
return parameters.get(index).value;
}
index++;
}
return null;
}
private PageElementFunction(
MagicWord magicWord, String functionName,
int beginIndex, int endIndex,
List<Parameter> parameters) {
super(beginIndex, endIndex);
this.magicWord = magicWord;
this.functionNameNotTrimmed = functionName;
this.functionName = (functionName != null) ? functionName.trim() : null;
this.parameters = parameters;
}
private void addPartBeforeParameters(StringBuilder sb) {
sb.append("{{");
sb.append(functionNameNotTrimmed);
}
private void addPartFromParameters(StringBuilder sb) {
for (Parameter parameter : parameters) {
addParameter(sb, parameter.name, parameter.value);
}
sb.append("}}");
}
private void addParameter(StringBuilder sb, String parameterName, String parameterValue) {
sb.append('|');
if ((parameterName != null) && (parameterName.trim().length() > 0)) {
sb.append(parameterName);
sb.append('=');
}
sb.append(parameterValue);
}
/* (non-Javadoc)
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
addPartBeforeParameters(sb);
addPartFromParameters(sb);
return sb.toString();
}
/**
* @param name Function name.
* @param value Value of the function.
* @return Textual representation of the function.
*/
public static String createFunction(String name, String value) {
StringBuilder sb = new StringBuilder();
sb.append("{{");
sb.append(name);
if (value != null) {
if ((name.length() > 0) && (name.charAt(name.length() - 1) != ':')) {
sb.append(':');
}
sb.append(value.trim());
}
sb.append("}}");
return sb.toString();
}
}