/*
* WPCleaner: A tool to help on Wikipedia maintenance tasks.
* Copyright (C) 2013 Nicolas Vervelle
*
* See README.txt file for licensing information.
*/
package org.wikipediacleaner.api.data;
import java.util.List;
import org.wikipediacleaner.api.constants.EnumWikipedia;
/**
* Class containing information about a title (== Title ==).
*/
public class PageElementTitle extends PageElement {
private final int firstLevel;
private final int secondLevel;
private final String titleNotTrimmed;
private final String title;
private final String afterTitleNotTrimmed;
private final boolean multiline;
/**
* Analyze contents to check if it matches a title.
*
* @param wikipedia Wikipedia.
* @param contents Contents.
* @param index Block start index.
* @param comments Comments in the page.
* @param tags Tags in the page.
* @return Block details it there's a block.
*/
public static PageElementTitle analyzeBlock(
EnumWikipedia wikipedia, String contents, int index,
List<PageElementComment> comments,
List<PageElementTag> tags) {
// Verify arguments
if (contents == null) {
return null;
}
int maxLength = contents.length();
// Check that this is an equal sign at the beginning of a line
int beginIndex = index;
boolean hasNewLine = false;
while ((index >= 0) && !hasNewLine) {
index--;
if (index < 0) {
hasNewLine = true;
} else if (contents.charAt(index) == '\n') {
hasNewLine = true;
} else if (contents.charAt(index) == '>') {
PageElementComment comment = null;
for (PageElementComment tmpComment : comments) {
if (tmpComment.getEndIndex() == index + 1) {
comment = tmpComment;
}
}
if (comment == null) {
return null;
}
index = comment.getBeginIndex();
} else {
return null;
}
}
// Compute first title level
int firstLevel = 0;
index = beginIndex;
while ((index < maxLength) && (contents.charAt(index) == '=')) {
index++;
firstLevel++;
}
if (index >= maxLength) {
return null;
}
int beginTitleIndex = index;
// Analyze title
boolean endFound = false;
boolean jump = false;
int secondLevel = 0;
int lastEqualIndex = index;
int endTitleIndex = index;
while ((index < maxLength) && (contents.charAt(index) != '\n')) {
char currentChar = contents.charAt(index);
int nextIndex = index + 1;
if (Character.isWhitespace(currentChar)) {
// Nothing to do, continue
} else if (currentChar == '=') {
// Equal sign, possible end of title
if (!endFound) {
endTitleIndex = index;
endFound = true;
secondLevel = 0;
}
secondLevel++;
lastEqualIndex = index;
} else if (currentChar == '<') {
PageElementComment comment = null;
for (PageElementComment tmpComment : comments) {
if (tmpComment.getBeginIndex() == index) {
comment = tmpComment;
}
}
if (comment == null) {
PageElementTag ref = null;
for (PageElementTag tmpTag : tags) {
if ((tmpTag.getBeginIndex() == index) &&
PageElementTag.TAG_WIKI_REF.equals(tmpTag.getName()) &&
(tmpTag.isComplete())) {
ref = tmpTag;
}
}
if (ref == null) {
endFound = false;
} else {
nextIndex = ref.getCompleteEndIndex();
jump = true;
}
} else {
nextIndex = comment.getEndIndex();
jump = true;
}
} else {
endFound = false;
}
index = nextIndex;
}
int endIndex = index;
if (!endFound) {
return null;
}
boolean multiline = false;
if (jump) {
for (int i = beginIndex; i < endIndex; i++) {
if (contents.charAt(i) == '\n') {
multiline = true;
}
}
}
return new PageElementTitle(
beginIndex, endIndex,
firstLevel, secondLevel,
contents.substring(beginTitleIndex, endTitleIndex),
contents.substring(lastEqualIndex + 1, endIndex),
multiline);
}
/**
* @return Title level.
*/
public int getLevel() {
return Math.min(firstLevel, secondLevel);
}
/**
* @return True if there's nothing questionable about this title.
*/
public boolean isCoherent() {
return (firstLevel == secondLevel);
}
/**
* @return Number of "=" before the title.
*/
public int getFirstLevel() {
return firstLevel;
}
/**
* @return Number of "=" after the title.
*/
public int getSecondLevel() {
return secondLevel;
}
/**
* @return Title itself.
*/
public String getTitle() {
return title;
}
/**
* @return Title not trimmed.
*/
public String getTitleNotTrimmed() {
return titleNotTrimmed;
}
/**
* @return Text after title.
*/
public String getAfterTitle() {
return afterTitleNotTrimmed;
}
/**
* @return True if title spans on several lines.
*/
public boolean isMultiline() {
return multiline;
}
/**
* @param beginIndex Begin index.
* @param endIndex End infex.
* @param firstLevel Title level (using the first "=")
* @param secondLevel Title level (using the last "=")
* @param title Title itself.
* @param afterTitle Text after the title.
* @param multiline True if title spans on several lines.
*/
private PageElementTitle(
int beginIndex, int endIndex,
int firstLevel, int secondLevel,
String title, String afterTitle,
boolean multiline) {
super(beginIndex, endIndex);
this.firstLevel = firstLevel;
this.secondLevel = secondLevel;
this.titleNotTrimmed = title;
this.title = (title != null) ? title.trim() : null;
this.afterTitleNotTrimmed = afterTitle;
this.multiline = multiline;
}
/* (non-Javadoc)
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < firstLevel; i++) {
sb.append('=');
}
sb.append(titleNotTrimmed);
for (int i = 0; i < secondLevel; i++) {
sb.append('=');
}
if (afterTitleNotTrimmed != null) {
sb.append(afterTitleNotTrimmed);
}
return sb.toString();
}
/**
* @param level Title level.
* @param title Title text.
* @param after Extra text after title.
* @return Textual representation of the title.
*/
public static String createTitle(int level, String title, String after) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < level; i++) {
sb.append('=');
}
sb.append(' ');
if (title != null) {
sb.append(title.trim());
sb.append(' ');
}
for (int i = 0; i < level; i++) {
sb.append('=');
}
if ((after != null) && (after.trim().length() > 0)) {
sb.append(' ');
sb.append(after.trim());
}
return sb.toString();
}
/**
* @param level Title level.
* @param title Title text.
* @param after Extra text after title.
* @return Textual representation of the title.
*/
public static String createUntrimmedTitle(int level, String title, String after) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < level; i++) {
sb.append('=');
}
if (title != null) {
sb.append(title);
}
for (int i = 0; i < level; i++) {
sb.append('=');
}
if ((after != null) && (after.trim().length() > 0)) {
sb.append(' ');
sb.append(after.trim());
}
return sb.toString();
}
}