/*
* WPCleaner: A tool to help on Wikipedia maintenance tasks.
* Copyright (C) 2013 Nicolas Vervelle
*
* See README.txt file for licensing information.
*/
package org.wikipediacleaner.api.data;
import java.util.List;
/**
* Class containing information about an element of the page.
*/
public abstract class PageElement {
private final int beginIndex;
private final int endIndex;
/**
* @param beginIndex Beginning of the page element.
* @param endIndex End of the page element.
*/
public PageElement(int beginIndex, int endIndex) {
this.beginIndex = beginIndex;
this.endIndex = endIndex;
}
/**
* @return Beginning of the page element.
*/
public int getBeginIndex() {
return beginIndex;
}
/**
* @return End of the page element.
*/
public int getEndIndex() {
return endIndex;
}
/**
* Group consecutive elements.
*
* @param elements List of elements.
* @param firstIndex Index of first element in the list.
* @param contents Page contents.
* @param punctuation Possible punctuation between elements.
* @param separator Possible separator between elements.
* @return Index of last element in the group of consecutive elements.
*/
public static int groupElements(
List<PageElement> elements,
int firstIndex,
String contents,
String punctuation, String separator) {
if (elements == null) {
return firstIndex;
}
int elementIndex = firstIndex;
while (elementIndex + 1 < elements.size()) {
int nextBeginIndex = elements.get(elementIndex + 1).getBeginIndex();
int currentIndex = elements.get(elementIndex).getEndIndex();
boolean separatorFound = false;
while (currentIndex < nextBeginIndex) {
if (!separatorFound &&
(separator != null) &&
contents.startsWith(separator, currentIndex)) {
separatorFound = true;
currentIndex += separator.length();
} else if (contents.startsWith(" ", currentIndex)) {
currentIndex += " ".length();
} else if (!Character.isWhitespace(contents.charAt(currentIndex)) &&
((punctuation == null) ||
(punctuation.indexOf(contents.charAt(currentIndex)) < 0))) {
return elementIndex;
} else {
currentIndex++;
}
}
elementIndex++;
}
return elementIndex;
}
/**
* Create a textual representation of a list of elements.
*
* @param elements List of elements.
* @param firstIndex Index of first element in the list.
* @param lastIndex Index of last element in the list.
* @param contents Page contents.
* @param separator Separator.
* @return Textual representation of a list of elements.
*/
public static String createListOfElements(
List<PageElement> elements,
int firstIndex, int lastIndex,
String contents, String separator) {
StringBuilder buffer = new StringBuilder();
int index = firstIndex;
while (index <= lastIndex) {
if ((index > firstIndex) && (separator != null)) {
buffer.append(separator);
}
int beginIndex = elements.get(index).getBeginIndex();
int endIndex = elements.get(index).getEndIndex();
index++;
buffer.append(contents.substring(beginIndex, endIndex));
}
return buffer.toString();
}
}