/* TableParser - parser of wiki tables {| .. |}.
*
* Copyright (c) 2005-2008 Andrew Krizhanovsky /aka at mail.iias.spb.su/
* Distributed under GNU Public License.
*/
package wikokit.base.wikipedia.text;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
/** Parser of wiki tables {| .. |}.
*/
public class TableParser {
private final static StringBuffer NULL_STRINGBUFFER = new StringBuffer("");
/*private final static Pattern ptrn_table_boundary = Pattern.compile("{|(.+?)|}", Pattern.DOTALL);
private final static Pattern ptrn_left_table_boundary = Pattern.compile("{|");
private final static Pattern ptrn_right_table_boundary = Pattern.compile("|}");*/
//private final static Pattern ptrn_table_boundaries = Pattern.compile("{\\||\\|}");
private final static Pattern ptrn_table_boundaries = Pattern.compile("\\{\\||\\|\\}");
/** Removes tables, and embedded tables also, e.g.
* "{| table 1 \n {| A table in the table 1 \n|}|}".
*
* Remark: if this func is before CurlyBrackets () then it generates
* warnings, since end of infobox (template) {{ |}} looks like end of table.
*/
public static StringBuffer removeWikiTables(StringBuffer text)
{
final String w_closed_too_many = "Warning (wikipedia.text.TableParser.removeWikiTables()): number of opened brackets '{|' < than closed brackets '|}'";
final String w_opened_too_many = "Warning (wikipedia.text.TableParser.removeWikiTables()): number of opened brackets '{|' > than closed brackets '|}'";
if(null == text || 0 == text.length()) {
return NULL_STRINGBUFFER;
}
Matcher m = ptrn_table_boundaries.matcher(text.toString());
boolean result = m.find();
if(result) {
StringBuffer sb = new StringBuffer();
int n_nested = 0;
while(result) {
String g0 = m.group(0);
if('{' == g0.charAt(0)) {
if(0 == n_nested) { // first opened bracket
m.appendReplacement(sb, "");
}
n_nested ++;
} else {
n_nested --;
if(n_nested == 0) {
StringBuffer temp = new StringBuffer();
m.appendReplacement(temp, ""); // I don't know why this line is important!
}
if(n_nested < 0) {
System.out.println(w_closed_too_many);
}
}
result = m.find();
}
m.appendTail(sb);
if(n_nested < 0) {
System.out.println(w_closed_too_many);
} else {
if(n_nested > 0) {
System.out.println(w_opened_too_many);
}
}
return sb;
}
return text;
}
}