/*
* WPCleaner: A tool to help on Wikipedia maintenance tasks.
* Copyright (C) 2013 Nicolas Vervelle
*
* See README.txt file for licensing information.
*/
package org.wikipediacleaner.api.data;
import java.util.List;
import org.wikipediacleaner.api.constants.EnumWikipedia;
/**
* Class containing information about a language link ([[lang:link|text]]).
*/
public class PageElementLanguageLink extends PageElement {
private final String languageNotTrimmed;
private final String language;
private final String linkNotTrimmed;
private final String link;
private final String textNotTrimmed;
private final String text;
/**
* Analyze contents to check if it matches a language link.
*
* @param wikipedia Wikipedia.
* @param contents Contents.
* @param index Block start index.
* @return Block details it there's a block.
*/
public static PageElementLanguageLink analyzeBlock(
EnumWikipedia wikipedia, String contents, int index) {
// Verify arguments
if (contents == null) {
return null;
}
// Look for '[['
int tmpIndex = index;
if ((tmpIndex >= contents.length()) ||
(!contents.startsWith("[[", tmpIndex))) {
return null;
}
tmpIndex += 2;
int beginIndex = tmpIndex;
// Possible whitespaces characters
while ((tmpIndex < contents.length()) && (contents.charAt(tmpIndex) == ' ')) {
tmpIndex++;
}
// Search for :
while ((tmpIndex < contents.length()) &&
(contents.charAt(tmpIndex) != ':') &&
(contents.charAt(tmpIndex) != '|') &&
(contents.charAt(tmpIndex) != ']') &&
(contents.charAt(tmpIndex) != '[')) {
tmpIndex++;
}
if ((tmpIndex >= contents.length()) || (contents.charAt(tmpIndex) != ':')) {
return null;
}
// Check that namespace is language
int colonIndex = tmpIndex;
List<Language> languages = wikipedia.getWikiConfiguration().getLanguages();
if (!Language.isLanguageCode(languages, contents.substring(beginIndex, colonIndex).trim())) {
return null;
}
// Search for |
while ((tmpIndex < contents.length()) &&
(contents.charAt(tmpIndex) != '|') &&
(contents.charAt(tmpIndex) != ']')) {
tmpIndex++;
}
if (tmpIndex >= contents.length()) {
return null;
}
// Simple language tag [[lang:link]]
if (contents.charAt(tmpIndex) == ']') {
if (!contents.startsWith("]]", tmpIndex)) {
return null;
}
return new PageElementLanguageLink(
index, tmpIndex + 2,
contents.substring(beginIndex, colonIndex),
contents.substring(colonIndex + 1, tmpIndex),
null);
}
// Find elements of image
int endIndex = contents.indexOf("]]", colonIndex);
if (endIndex < 0) {
return null;
}
return new PageElementLanguageLink(
index, endIndex + 2,
contents.substring(beginIndex, colonIndex),
contents.substring(colonIndex + 1, tmpIndex),
contents.substring(tmpIndex + 1, endIndex));
}
public String getLanguage() {
return language;
}
public String getLink() {
return link;
}
public String getText() {
return text;
}
private PageElementLanguageLink(
int beginIndex, int endIndex,
String language, String link,
String text) {
super(beginIndex, endIndex);
this.languageNotTrimmed = language;
this.language = (language != null) ? language.trim() : null;
this.linkNotTrimmed = link;
this.link = (link != null) ? link.trim() : null;
this.textNotTrimmed = text;
this.text = (text != null) ? text.trim() : null;
}
/* (non-Javadoc)
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("[[");
sb.append(languageNotTrimmed);
sb.append(':');
if (linkNotTrimmed != null) {
sb.append(linkNotTrimmed);
}
if (textNotTrimmed != null) {
sb.append('|');
sb.append(textNotTrimmed);
}
sb.append("]]");
return sb.toString();
}
}