/* WikiText.java - wiki text is a text, where some words are wikified.
*
* Copyright (c) 2009 Andrew Krizhanovsky <andrew.krizhanovsky at gmail.com>
* Distributed under EPL/LGPL/GPL/AL/BSD multi-license.
*/
package wikokit.base.wikt.util;
import java.util.regex.Pattern;
//import java.util.regex.Matcher;
import java.util.List;
import java.util.ArrayList;
/** WikiText is a text, where [[some]] [[word]]s [[be|are]] [[wikify|wikified]],
* e.g. "[[little]] [[bell]]".
*/
public class WikiText {
/** Visible text, e.g. "bullets m." for "[[bullet]]s {{m}}" */
private String text;
/** Source wikified text, e.g. "[[bullet]]s {{m}}". It is NULL if "text" hasn't any wikification. */
private String wikified_text;
/** Wiki internal links, e.g. "bullet" and "bullets for "[[bullet]]s" {{m}} */
private WikiWord[] wiki_words;
private final static WikiText[] NULL_WIKITEXT_ARRAY = new WikiText[0];
/** Split by comma and semicolon */
private final static Pattern ptrn_comma_semicolon = Pattern.compile(
"[,;]+");
/** Split by semicolon */
private final static Pattern ptrn_semicolon = Pattern.compile(
"[;]+");
public WikiText(String _text, String _wikified_text, WikiWord[] _wiki_words) {
text = _text;
wikified_text = _wikified_text;
wiki_words = _wiki_words;
}
/** Gets visible text, e.g. "bullets m." for "[[bullet]]s {{m}}" */
public String getVisibleText() {
return text;
}
/** Source wikified text. */
public String getWikifiedText() {
return wikified_text;
}
/** Gets array of internal links (wiki words, i.e. words with hyperlinks). */
public WikiWord[] getWikiWords() {
return wiki_words;
}
/** Frees memory recursively. */
public void free ()
{
if(null != wiki_words) {
for(int i=0; i<wiki_words.length; i++)
wiki_words[i] = null;
wiki_words = null;
}
}
/** Parses text, creates array of wiki words (words with hyperlinks),
* e.g. text is "[[little]] [[bell]]", wiki_words[]={"little", "bell"}
* This function should be used for definitions / meanings.
*
* @return NULL if there is no text.
*/
public static WikiText createOnePhrase(String page_title, String _wikified_text)
{
_wikified_text = _wikified_text.trim();
if(0 == _wikified_text.length()) {
return null;
}
StringBuffer sb = new StringBuffer(_wikified_text);
String s = WikiWord.parseDoubleBrackets(page_title, sb).toString();
WikiWord[] ww = WikiWord.getWikiWords(page_title, sb);
if(s.length() == _wikified_text.length())
_wikified_text = null; // wikified text is NULL if "text" hasn't any wikification
return new WikiText(s, _wikified_text, ww);
}
/** Parses text (split by commas), creates array of wiki words (words with hyperlinks),
* e.g. text is "[[little]] [[bell]], [[handbell]], [[doorbell]]".
* This function should be used to split wikified list of synonyms and translations.
* @return empty array if there is no text.
*/
public static WikiText[] createSplitByComma(String page_title, String text)
{
if(0 == text.trim().length()) {
return NULL_WIKITEXT_ARRAY;
}
String[] ww = ptrn_comma_semicolon.split(text); // split by comma and semicolon
// split should take into account brackets, e.g. "bread (new, old), butter" -> "bread (new, old)", "butter"
// todo
// ...
List<WikiText> wt_list = new ArrayList<WikiText>();
for(String w : ww) {
WikiText wt = WikiText.createOnePhrase(page_title, w.trim());
if(null != wt) {
wt_list.add(wt);
}
}
if(0 == wt_list.size()) {
return NULL_WIKITEXT_ARRAY;
}
return (WikiText[])wt_list.toArray(NULL_WIKITEXT_ARRAY);
}
/** Parses text (split by semicolons), creates array of wiki text fragments
* This function should be used to split wikified list of synonyms and translations.
* @return empty array if there is no text.
*/
public static WikiText[] createSplitBySemicolon(String page_title, String text)
{
if(0 == text.trim().length()) {
return NULL_WIKITEXT_ARRAY;
}
String[] ww = ptrn_semicolon.split(text); // split by comma and semicolon
List<WikiText> wt_list = new ArrayList<WikiText>();
for(String w : ww) {
WikiText wt = WikiText.createOnePhrase(page_title, w.trim());
if(null != wt) {
wt_list.add(wt);
}
}
if(0 == wt_list.size()) {
return NULL_WIKITEXT_ARRAY;
}
return (WikiText[])wt_list.toArray(NULL_WIKITEXT_ARRAY);
}
/** Creates array of wiki words (words with hyperlinks) without any parsing.
*
* @param wikified_words words which are already without [[wikification]],
* e.g. translation extracted from {{t|lang_code|wiki_word}}
* @return empty array if there is no text.
*/
public static WikiText[] createWithoutParsing(String page_title,
List<String> wikified_words)
{
int size = wikified_words.size();
if(0 == size)
return NULL_WIKITEXT_ARRAY;
WikiText[] wt = new WikiText[size];
int i=0;
for(String w : wikified_words) {
WikiWord[] ww_array1 = new WikiWord[1];
ww_array1[0] = new WikiWord(w, w, null);
// todo: check "[["+w+"]]" - is it necessary?
wt[i++] = new WikiText(w, "[["+w+"]]", ww_array1);
}
return wt;
}
/** @return true if object are equals (the same texts and the same wikified texts). */
static public boolean equals (WikiText one, WikiText two) {
if (null == one && null == two)
return true;
if (null == one || null == two)
return false;
String text1 = one.text;
String text2 = two.text;
if (null == text1 && null == text2)
return true;
if (null == text1 || null == text2)
return false;
if(text1.equalsIgnoreCase(text2)) {
// wikified text is NULL if "text" hasn't any wikification
String w1 = one.wikified_text;
String w2 = two.wikified_text;
if (null == w1 && null == w2)
return true;
if (null == w1 || null == w2)
return false;
return w1.equalsIgnoreCase(w2);
}
return false;
}
}
// return true, if wiki text corresponds only to one word, it's important for translation
// e.g. "[[little]] [[bell]]" == false;
// "[[doorbell]]" == true
//
// boolean isOneWord (translation)
// todo
// ...