/* ReferenceParser.java - parser of wiki references <ref>...</ref>
*
* Copyright (c) 2005-2008 Andrew Krizhanovsky /aka at mail.iias.spb.su/
* Distributed under GNU Public License.
*/
package wikokit.base.wikipedia.text;
import wikokit.base.wikipedia.util.StringUtil;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
/** Parser of wiki references <ref>...</ref>
*/
public class ReferenceParser {
//private final static Pattern ptrn_ref = Pattern.compile("<ref>");
private final static Pattern ptrn_ref_boundaries = Pattern.compile("<ref>(.+?)</ref>");
private final static Pattern ptrn_http_url = Pattern.compile("\\bhttp://.+?(\\s|$)");
private final static StringBuffer NULL_STRINGBUFFER = new StringBuffer("");
//sb = removeHTTPURL(sb);
/** Removes URL like http://... fro the text.
*/
//expandReferenceToEndOfText() {
private static StringBuffer removeHTTPURL(StringBuffer text)
{
if(null == text || 0 == text.length()) {
return NULL_STRINGBUFFER;
}
Matcher m = ptrn_http_url.matcher(text.toString());
return new StringBuffer(m.replaceAll(""));
}
/** Expands texts of the refence, and adds it to the end of text.
*
* If the reference contains a template, e.g. <ref>{{cite book |..</ref>
* then the whole reference will be deleted.
*/
public static StringBuffer expandMoveToEndOfText(StringBuffer text)
{
if(null == text || 0 == text.length()) {
return NULL_STRINGBUFFER;
}
//Matcher m = ptrn_ref_boundaries.matcher(StringUtil.escapeCharDollarAndBackslash(text.toString()));
Matcher m = ptrn_ref_boundaries.matcher(text.toString());
boolean bfound = m.find();
if(bfound) {
StringBuffer result = new StringBuffer();
StringBuffer eo_text = new StringBuffer(); // end of text
while(bfound) {
// group(1) := text within <ref>reference boundaries</ref>
StringBuffer sb = WikiParser.parseCurlyBrackets(
StringUtil.escapeCharDollarAndBackslash(m.group(1) ));
sb = removeHTTPURL(sb);
eo_text.append( sb );
m.appendReplacement(result, "");
bfound = m.find();
}
m.appendTail(result);
if(eo_text.length() > 0) {
result.append("\n\n");
result.append(eo_text);
}
return result;
}
return text;
}
/** Removes refences from the text.
*/
public static StringBuffer removeReferences(StringBuffer text)
{
if(null == text || 0 == text.length()) {
return NULL_STRINGBUFFER;
}
//Matcher m = ptrn_ref_boundaries.matcher(StringUtil.escapeCharDollarAndBackslash(text.toString()));
Matcher m = ptrn_ref_boundaries.matcher(text.toString());
return new StringBuffer(m.replaceAll(""));
}
}