package info.bliki.wiki.filter;
import info.bliki.htmlcleaner.ContentToken;
import info.bliki.htmlcleaner.EndTagToken;
import info.bliki.htmlcleaner.TagNode;
import info.bliki.htmlcleaner.TagToken;
import info.bliki.wiki.model.Configuration;
import info.bliki.wiki.model.DefaultEventListener;
import info.bliki.wiki.model.IEventListener;
import info.bliki.wiki.model.IWikiModel;
import info.bliki.wiki.tags.HTMLBlockTag;
import info.bliki.wiki.tags.HTMLTag;
import info.bliki.wiki.tags.WPBoldItalicTag;
import info.bliki.wiki.tags.WPTag;
import info.bliki.wiki.tags.util.Attribute;
import info.bliki.wiki.tags.util.IBodyTag;
import info.bliki.wiki.tags.util.INoBodyParsingTag;
import info.bliki.wiki.tags.util.NodeAttribute;
import info.bliki.wiki.tags.util.TagStack;
import info.bliki.wiki.tags.util.WikiTagNode;
import java.util.List;
/**
* A Wikipedia syntax parser for parsing in wiki preformatted blocks (rendered
* as <pre>...</pre>)
*
*/
public class WikipediaPreTagParser extends AbstractParser {
/**
* Enable HTML tags
*/
private final boolean fHtmlCodes = true;
private IEventListener fEventListener = null;
public WikipediaPreTagParser(String stringSource) {
this(stringSource, null);
}
public WikipediaPreTagParser(String stringSource, IEventListener wikiListener) {
super(stringSource);
if (wikiListener == null) {
fEventListener = DefaultEventListener.CONST;
} else {
fEventListener = wikiListener;
}
}
public int getNextToken() // throws InvalidInputException
{
fWhiteStart = true;
fWhiteStartPosition = fCurrentPosition;
try {
while (true) {
fCurrentCharacter = fSource[fCurrentPosition++];
// ---------Identify the next token-------------
switch (fCurrentCharacter) {
case '\n':
// check at the end of line, if there is open wiki bold or italic
// markup
reduceTokenStackBoldItalic();
break;
}
// ---------Identify the next token-------------
switch (fCurrentCharacter) {
case '[':
if (parseWikiLink()) {
continue;
}
break;
case '\'':
if (getNextChar('\'')) {
if (getNextChar('\'')) {
if (getNextChar('\'')) {
if (getNextChar('\'')) {
createContentToken(5);
return TokenBOLDITALIC;
}
fCurrentPosition -= 1;
fWhiteStart = true;
createContentToken(3);
return TokenBOLD;
}
createContentToken(3);
return TokenBOLD;
}
createContentToken(2);
return TokenITALIC;
}
break;
case '%':
if (getNextChar('%')) {
if (getNextChar('%')) {
createContentToken(3);
return TokenDELETEDLINE;
}
createContentToken(2);
return TokenUNDERLNE;
}
break;
case '<':
if (fHtmlCodes) {
int htmlStartPosition = fCurrentPosition;
// HTML tags are allowed
try {
switch (fStringSource.charAt(fCurrentPosition)) {
case '!': // <!-- HTML comment -->
if (parseHTMLCommentTags()) {
continue;
}
break;
default:
if (fSource[fCurrentPosition] != '/') {
// opening HTML tag
WikiTagNode tagNode = parseTag(fCurrentPosition);
if (tagNode != null) {
String tagName = tagNode.getTagName();
TagToken tag = fWikiModel.getTokenMap().get(tagName);
if ((tag != null) && !(tag instanceof HTMLBlockTag)) {
tag = (TagToken) tag.clone();
if (tag instanceof TagNode) {
TagNode node = (TagNode) tag;
List<NodeAttribute> attributes =
tagNode.getAttributesEx();
Attribute attr;
for (int i = 1; i < attributes.size(); i++) {
attr = attributes.get(i);
node.addAttribute(
attr.getName(),
attr.getValue(),
true);
}
}
if (tag instanceof HTMLTag) {
// ((HTMLTag) tag).setTemplate(isTemplate());
}
createContentToken(1);
fCurrentPosition = fScannerPosition;
String allowedParents = tag.getParents();
if (allowedParents != null) {
reduceTokenStack(tag);
}
createTag(tag, tagNode, tagNode.getEndPosition());
return TokenIgnore;
}
break;
}
} else {
// closing HTML tag
WikiTagNode tagNode = parseTag(++fCurrentPosition);
if (tagNode != null) {
String tagName = tagNode.getTagName();
TagToken tag = fWikiModel.getTokenMap().get(tagName);
if ((tag != null) && !(tag instanceof HTMLBlockTag)) {
createContentToken(2);
fCurrentPosition = fScannerPosition;
if (fWikiModel.stackSize() > 0) {
TagToken topToken = fWikiModel.peekNode();
if (topToken.getName().equals(tag.getName())) {
fWikiModel.popNode();
return TokenIgnore;
} else {
if (tag.isReduceTokenStack()) {
reduceStackUntilToken(tag);
}
}
} else {
}
return TokenIgnore;
}
break;
}
}
}
} catch (IndexOutOfBoundsException e) {
// do nothing
}
fCurrentPosition = htmlStartPosition;
}
break;
default:
// if (Character.isLetter(fCurrentCharacter)) {
// if (fCurrentPosition < 2 ||
// !Character.isLetterOrDigit(fSource[fCurrentPosition - 2])) {
// if (fCurrentCharacter == 'i' || fCurrentCharacter == 'I') {
// // ISBN ?
// if (parseISBNLinks()) {
// continue;
// }
// }
//
// if (parseURIScheme()) {
// // a URI scheme registered in the wiki model (ftp, http,
// // https,...)
// continue;
// }
//
// if (fWikiModel.isCamelCaseEnabled() &&
// Character.isUpperCase(fCurrentCharacter)
// && fWikiModel.getRecursionLevel() <= 1) {
// if (parseCamelCaseLink()) {
// continue;
// }
// }
// }
// }
}
if (!fWhiteStart) {
fWhiteStart = true;
fWhiteStartPosition = fCurrentPosition - 1;
}
}
// -----------------end switch while try--------------------
} catch (IndexOutOfBoundsException e) {
// end of scanner text
}
try {
createContentToken(1);
} catch (IndexOutOfBoundsException e) {
// end of scanner text
}
return TokenEOF;
}
private boolean parseHTMLCommentTags() {
int htmlStartPosition = fCurrentPosition;
String htmlCommentString =
fStringSource.substring(fCurrentPosition - 1, fCurrentPosition + 3);
if (htmlCommentString.equals("<!--")) {
fCurrentPosition += 3;
if (readUntil("-->")) {
String htmlCommentContent =
fStringSource.substring(htmlStartPosition + 3, fCurrentPosition - 3);
if (htmlCommentContent != null) {
createContentToken(fCurrentPosition - htmlStartPosition + 1);
return true;
}
}
}
return false;
}
/**
* Parse a wiki section starting with a '[' character
*
* @return <code>true</code> if a correct link was found
*/
private boolean parseWikiLink() {
int startLinkPosition = fCurrentPosition;
if (getNextChar('[')) {
return parseWikiTag();
} else {
createContentToken(1);
fWhiteStart = false;
if (readUntilCharOrStopAtEOL(']')) {
String name =
fStringSource.substring(startLinkPosition, fCurrentPosition - 1);
// bbcode start
if (fWikiModel.parseBBCodes() && name.length() > 0) {
// parse start tokens like phpBB forum syntax style (bbcode)
char ch = name.charAt(0);
if ('a' <= ch && ch <= 'z') {
// first character must be a letter
StringBuilder bbCode = new StringBuilder(name.length());
bbCode.append(ch);
if (parsePHPBBCode(name, bbCode)) {
return true;
}
}
}
// bbcode end
// if (handleHTTPLink(name)) {
// return true;
// }
}
fCurrentPosition = startLinkPosition;
}
return false;
}
/**
* Parse a wiki section starting with a '[[' sequence
*
* @return <code>true</code> if a correct link was found
*/
private boolean parseWikiTag() {
int startLinkPosition = fCurrentPosition;
int endLinkPosition;
// wikipedia link style
createContentToken(2);
int temp = fCurrentPosition;
if (findWikiLinkEnd()) {
endLinkPosition = fCurrentPosition - 2;
String name = fStringSource.substring(startLinkPosition, endLinkPosition);
// test for a suffix string behind the Wiki link. Useful for plurals.
// Example:
// Dolphins are [[aquatic mammal]]s that are closely related to [[whale]]s
// and [[porpoise]]s.
temp = fCurrentPosition;
String suffix = "";
try {
fCurrentCharacter = fSource[fCurrentPosition];
if (Character.isLowerCase(fCurrentCharacter)) {
fCurrentPosition++;
StringBuilder suffixBuffer = new StringBuilder(16);
suffixBuffer.append(fCurrentCharacter);
while (true) {
fCurrentCharacter = fSource[fCurrentPosition++];
if (!Character.isLowerCase(fCurrentCharacter)) {
fCurrentPosition--;
break;
}
suffixBuffer.append(fCurrentCharacter);
}
suffix = suffixBuffer.toString();
}
} catch (IndexOutOfBoundsException e) {
fCurrentPosition = temp;
}
fEventListener.onWikiLink(
fSource,
startLinkPosition,
endLinkPosition,
suffix);
if (!fWikiModel.appendRawWikipediaLink(name, suffix)) {
fCurrentPosition = temp;
// this is probably a special image link
throw new InvalidPreWikiTag("parseWikiTag");
}
return true;
} else {
fWhiteStart = true;
fWhiteStartPosition = startLinkPosition - 2;
fCurrentPosition = temp + 1;
}
return false;
}
private void createTag(TagToken tag, WikiTagNode tagNode,
int startMacroPosition) {
String endTag;
String macroBodyString = "";
int index0;
String command = tagNode.getTagName();
if ((tag != null)
&& (tag instanceof IBodyTag)
&& (!tagNode.isEmptyXmlTag())) {
endTag = command + '>';
index0 =
Util.indexOfIgnoreCase(fStringSource, "</", endTag, startMacroPosition);
if (index0 >= 0) {
macroBodyString = fStringSource.substring(startMacroPosition, index0);
fCurrentPosition = index0 + endTag.length() + 2;
} else {
macroBodyString =
fStringSource.substring(startMacroPosition, fSource.length);
fCurrentPosition = fSource.length;
}
} else {
macroBodyString = null;
fCurrentPosition = startMacroPosition;
}
handleTag(tag, tagNode, macroBodyString);
}
private void handleTag(TagToken tag, WikiTagNode tagNode, String bodyString) {
String command = tagNode.getTagName();
try {
if (tag instanceof EndTagToken) {
fWikiModel.append(tag);
} else {
fWikiModel.pushNode(tag);
if (null != bodyString) {
if (tag instanceof INoBodyParsingTag) {
((TagNode) tag).addChild(new ContentToken(bodyString));
} else {
// recursively filter tags within the tags body string
WikipediaPreTagParser.parseRecursive(
bodyString.trim(),
fWikiModel,
false,
true);
}
}
if (tag instanceof IBodyTag) {
fWikiModel.popNode();
}
}
} catch (IllegalArgumentException e) {
TagNode divTagNode = new TagNode("div");
divTagNode.addAttribute("class", "error", true);
divTagNode.addChild(new ContentToken("IllegalArgumentException: "
+ command
+ " - "
+ e.getMessage()));
fWikiModel.append(divTagNode);
e.printStackTrace();
} catch (Throwable e) {
e.printStackTrace();
TagNode divTagNode = new TagNode("div");
divTagNode.addAttribute("class", "error", true);
divTagNode.addChild(new ContentToken(command + ": " + e.getMessage()));
fWikiModel.append(divTagNode);
e.printStackTrace();
}
}
@Override
public void runParser() {
int token = TokenSTART;
while ((token = getNextToken()) != TokenEOF) {
switch (token) {
case TokenBOLDITALIC:
if (fWikiModel.stackSize() > 0
&& fWikiModel.peekNode().equals(BOLDITALIC)) {
fWikiModel.popNode();
// fResultBuffer.append("</i></b>");
} else if (fWikiModel.stackSize() > 1
&& fWikiModel.peekNode().equals(BOLD)
&& fWikiModel.getNode(fWikiModel.stackSize() - 2).equals(ITALIC)) {
fWikiModel.popNode();
fWikiModel.popNode();
// fResultBuffer.append("</b></i>");
} else if (fWikiModel.stackSize() > 1
&& fWikiModel.peekNode().equals(ITALIC)
&& fWikiModel.getNode(fWikiModel.stackSize() - 2).equals(BOLD)) {
fWikiModel.popNode();
fWikiModel.popNode();
// fResultBuffer.append("</i></b>");
} else if (fWikiModel.stackSize() > 0
&& fWikiModel.peekNode().equals(BOLD)) {
fWikiModel.popNode();
fWikiModel.pushNode(new WPTag("i"));
} else if (fWikiModel.stackSize() > 0
&& fWikiModel.peekNode().equals(ITALIC)) {
fWikiModel.popNode();
fWikiModel.pushNode(new WPTag("b"));
} else {
fWikiModel.pushNode(new WPBoldItalicTag());
// fResultBuffer.append("<b><i>");
}
break;
case TokenBOLD:
if (fWikiModel.stackSize() > 0
&& fWikiModel.peekNode().equals(BOLDITALIC)) {
fWikiModel.popNode();
fWikiModel.pushNode(new WPTag("i"));
// fResultBuffer.append("</b>");
} else if (fWikiModel.stackSize() > 0
&& fWikiModel.peekNode().equals(BOLD)) {
fWikiModel.popNode();
// fResultBuffer.append("</b>");
} else {
fWikiModel.pushNode(new WPTag("b"));
// fResultBuffer.append("<b>");
}
break;
case TokenITALIC:
if (fWikiModel.stackSize() > 0
&& fWikiModel.peekNode().equals(BOLDITALIC)) {
fWikiModel.popNode();
fWikiModel.pushNode(new WPTag("b"));
// fResultBuffer.append("</i>");
} else if (fWikiModel.stackSize() > 0
&& fWikiModel.peekNode().equals(ITALIC)) {
fWikiModel.popNode();
// fResultBuffer.append("</i>");
} else {
fWikiModel.pushNode(new WPTag("i"));
// fResultBuffer.append("<i>");
}
break;
case TokenUNDERLNE:
if (fWikiModel.stackSize() > 0
&& fWikiModel.peekNode().equals(UNDERLINE)) {
fWikiModel.popNode();
} else {
fWikiModel.pushNode(UNDERLINE);
}
break;
case TokenDELETEDLINE:
if (fWikiModel.stackSize() > 0
&& fWikiModel.peekNode().equals(DELETEDLINE)) {
fWikiModel.popNode();
} else {
fWikiModel.pushNode(DELETEDLINE);
}
break;
}
}
reduceTokenStack();
}
public boolean isNoToC() {
return false;
}
@Override
public void setNoToC(boolean noToC) {
}
/**
* Call the parser on the subsequent recursion levels, where the subtexts (of
* templates, table cells, list items or image captions) don't contain a table
* of contents (TOC)
*
* <b>Note:</b> the wiki model doesn't call the <code>setUp()</code> or
* <code>tearDown()</code> methods for the subsequent recursive parser steps.
*
* @param rawWikitext
* @param wikiModel
* @return
*/
public static void parseRecursive(String rawWikitext, IWikiModel wikiModel) {
parseRecursive(rawWikitext, wikiModel, false, true);
}
/**
* Call the parser on the subsequent recursion levels, where the subtexts (of
* templates, table cells, list items or image captions) don't contain a table
* of contents (TOC)
*
* <b>Note:</b> the wiki model doesn't call the <code>setUp()</code> or
* <code>tearDown()</code> methods for the subsequent recursive parser steps.
*
* @param rawWikitext
* @param wikiModel
* @param noTOC
* @param appendStack
* @return
* @return
*/
public static TagStack parseRecursive(String rawWikitext,
IWikiModel wikiModel, boolean createOnlyLocalStack, boolean noTOC) {
AbstractParser parser = new WikipediaPreTagParser(rawWikitext);
return parser
.parseRecursiveInternal(wikiModel, createOnlyLocalStack, noTOC);
}
@Override
public TagStack parseRecursiveInternal(IWikiModel wikiModel,
boolean createOnlyLocalStack, boolean noTOC) {
// local stack for this wiki snippet
TagStack localStack = new TagStack();
// global wiki model stack
TagStack globalWikiModelStack = wikiModel.swapStack(localStack);
try {
// fix for infinite recursion
// if (wikiModel.incrementParserRecursionLevel() >
// Configuration.PARSER_RECURSION_LIMIT) {
// TagNode error = new TagNode("span");
// error.addAttribute("class", "error", true);
// error.addChild(new
// ContentToken("Error - total recursion count limit exceeded parsing wiki tags."));
// localStack.append(error);
// return localStack;
// }
int level = wikiModel.incrementRecursionLevel();
if (level > Configuration.PARSER_RECURSION_LIMIT) {
TagNode error = new TagNode("span");
error.addAttribute("class", "error", true);
error.addChild(new ContentToken(
"Error - recursion limit exceeded parsing wiki tags."));
localStack.append(error);
return localStack;
}
// WikipediaParser parser = new WikipediaParser(rawWikitext,
// wikiModel.isTemplateTopic(), wikiModel.getWikiListener());
setModel(wikiModel);
runParser();
return localStack;
} catch (InvalidPreWikiTag ipwt) {
createOnlyLocalStack = true;
throw ipwt;
} catch (Exception e) {
e.printStackTrace();
TagNode error = new TagNode("span");
error.addAttribute("class", "error", true);
error.addChild(new ContentToken(e.getClass().getSimpleName()));
localStack.append(error);
} catch (Error e) {
e.printStackTrace();
TagNode error = new TagNode("span");
error.addAttribute("class", "error", true);
error.addChild(new ContentToken(e.getClass().getSimpleName()));
localStack.append(error);
} finally {
wikiModel.decrementRecursionLevel();
if (!createOnlyLocalStack) {
// append the resursively parsed local stack to the global wiki
// model
// stack
globalWikiModelStack.append(localStack);
}
wikiModel.swapStack(globalWikiModelStack);
}
return localStack;
}
}