Xomw_prepro_wkr.java example

Explorer
xowa-master
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com

XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.

You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.

The terms of each license can be found in the source code repository:

GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; import gplx.xowa.mediawiki.includes.parsers.*;
import gplx.core.btries.*;
public class Xomw_prepro_wkr {	// THREAD.UNSAFE: caching for repeated calls
	private final    Bry_bfr tmp_bfr = Bry_bfr_.New();
	private final    List_adp comments_list = List_adp_.New();
	private final    Btrie_slim_mgr elements_trie__y = Btrie_slim_mgr.ci_a7(), elements_trie__n = Btrie_slim_mgr.ci_a7();
	private final    Hash_adp_bry xmlish_allow_missing_end_tag = Hash_adp_bry.cs().Add_many_str("includeonly", "noinclude", "onlyinclude");
	private final    Hash_adp_bry no_more_closing_tag = Hash_adp_bry.cs();
	private final    Xomw_prepro_stack stack = new Xomw_prepro_stack();
	private final    Btrie_rv trv = new Btrie_rv();
	private Bry_bfr accum = Bry_bfr_.New();

	public void Init_by_wiki(String... xmlish_elems_ary) {
		Elements_trie__init_by_wiki(elements_trie__y, ignored_tags_y, xmlish_elems_ary, "noinclude");
		Elements_trie__init_by_wiki(elements_trie__n, ignored_tags_n, xmlish_elems_ary, "includeonly");
	}
	private void Elements_trie__init_by_wiki(Btrie_slim_mgr trie, Ordered_hash ignored_tags, String[] strip_list_ary, String xmlish_elem) {
		trie.Clear();
		Elements_trie__add(trie, Bool_.Y, "!--", "comment");
		// PORTED: $xmlishElements = parser->getStripList();
		for (String itm : strip_list_ary) {
			Elements_trie__add(trie, Bool_.N, itm, itm);
		}
		// PORTED: "$xmlishElements[] = 'noinclude';" or "$xmlishElements[] = 'includeonly';"
		Elements_trie__add(trie, Bool_.N, xmlish_elem, xmlish_elem);

		// PORTED: $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );
		int ignored_tags_len = ignored_tags.Count();
		for (int j = 0; j < ignored_tags_len; j++) {
			byte[] bry = (byte[])ignored_tags.Get_at(j);
			String str = String_.new_u8(bry);
			Elements_trie__add(trie, Bool_.N, str, str);
		}
	}
	private static void Elements_trie__add(Btrie_slim_mgr trie, boolean type_is_comment, String hook, String name) {
		trie.Add_obj(hook, new Xomw_prepro_elem(type_is_comment ? Xomw_prepro_elem.Type__comment : Xomw_prepro_elem.Type__other, Bry_.new_a7(name)));
	}
	public byte[] Preprocess_to_xml(byte[] src, boolean for_inclusion) {
		// RELIC.PROC_VAR:     forInclusion = $flags & Parser::PTD_FOR_INCLUSION;
		// RELIC.INIT_BY_WIKI: $xmlishElements = parser->getStripList();
		// RELIC.CLASS_VAR:    $xmlishAllowMissingEndTag = [ 'includeonly', 'noinclude', 'onlyinclude' ];
		boolean enable_only_include = false;

		// PORTED: rewritten so that all add / del is done in INIT_BY_WIKI
		Ordered_hash ignored_tags;
		Hash_adp ignored_elements;
		Btrie_slim_mgr elements_trie;
		if (for_inclusion) {
			ignored_tags = ignored_tags_y;              // RELIC: $ignoredTags = [ 'includeonly', '/includeonly' ];
			ignored_elements = ignored_elements__y;     // RELIC: $ignoredElements = [ 'noinclude' ];
			// RELIC.INIT_BY_WIKI: $xmlishElements[] = 'noinclude';
			if (	Bry_.Has(src, Bry__only_include_bgn)
				&&	Bry_.Has(src, Bry__only_include_end)) {
				enable_only_include = true;
			}
			elements_trie = elements_trie__y;
		}
		else {
			ignored_tags = ignored_tags_n;              // $ignoredTags = [ 'noinclude', '/noinclude', 'onlyinclude', '/onlyinclude' ];
			ignored_elements = ignored_elements__n;     // $ignoredElements = [ 'includeonly' ];
			// RELIC.INIT_BY_WIKI: $xmlishElements[] = 'includeonly';
			elements_trie = elements_trie__n;
		}

		// RELIC.INIT_BY_WIKI: $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );

		// RELIC.REGEX
		// Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset
		// $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA";

		stack.Clear();

		// RELIC.REGEX:
		// $searchBase = "[{<\n"; # }

		// RELIC.BRY_FIND
		// For fast reverse searches
		// $revText = strrev( $text );
		// $lengthText = strlen( $text );

		// Input pointer, starts out pointing to a pseudo-newline before the start
		int i = 0;

		// Current accumulator
		accum = stack.Get_accum();
		accum.Add_str_a7("<root>");

		// True to find equals signs in arguments
		boolean find_equals = false;

		// True to take notice of pipe characters
		boolean find_pipe = false;
		int heading_index = 1;

		// True if $i is inside a possible heading
		boolean in_heading = false;

		// True if there are no more greater-than (>) signs right of $i
		boolean no_more_gt = false;

		// Map of tag name => true if there are no more closing tags of given type right of $i
		no_more_closing_tag.Clear();

		// True to ignore all input up to the next <onlyinclude>
		boolean find_only_include = enable_only_include;

		// Do a line-start run without outputting an LF character
		boolean fake_line_start = true;

		// XOWA: init
		int src_len = src.length;
		int found = -1;
		byte[] cur_char = Bry_.Empty;
		byte[] cur_closing = Bry_.Empty;
		byte[] inner = null;
		Xomw_prepro_rule rule = null;

		while (true) {
			if (find_only_include) {
				// Ignore all input up to the next <onlyinclude>
				int start_pos = Bry_find_.Find_fwd(src, Bry__only_include_bgn, i, src_len);
				if (start_pos == Bry_find_.Not_found) {
					// Ignored section runs to the end
					accum.Add_str_a7("<ignore>").Add_bry_escape_html(src, i, src_len).Add_str_a7("</ignore>");
					break;
				}
				int tag_end_pos = start_pos + Bry__only_include_bgn.length; // past-the-end
				accum.Add_str_a7("<ignore>").Add_bry_escape_html(src, i, tag_end_pos).Add_str_a7("</ignore>");
				i = tag_end_pos;
				find_only_include = false;
			}

			if (fake_line_start) {
				found = Found__line_bgn;
				cur_char = Bry_.Empty;
			}
			else {
				// Find next opening brace, closing brace or pipe		
				// RELIC.REGEX: $search = $searchBase;
				if (stack.top == null) {
					cur_closing = Bry_.Empty;
				}
				else {
					cur_closing = stack.top.close;
					// RELIC.REGEX: $search .= $currentClosing;
				}
				if (find_pipe) {
					// RELIC.REGEX: $search .= '|';
				}
				if (find_equals) {
					// First equals will be for the template
					// RELIC.REGEX: $search .= '=';
				}

				// Output literal section, advance input counter
				// PORTED: "$literalLength = strcspn(src, $search, i)"; NOTE: no trie b/c of frequent changes to $search
				int literal_len = 0; 
				boolean loop_stop = false;
				// loop chars until search_char is found
				for (int j = i; j < src_len; j++) {
					byte b = src[j];
					switch (b) {                // handle '$searchBase = "[{<\n";'
						case Byte_ascii.Brack_bgn:
						case Byte_ascii.Curly_bgn:
						case Byte_ascii.Angle_bgn:
						case Byte_ascii.Nl:
							loop_stop = true;
							break;
						case Byte_ascii.Pipe:   // handle "find_pipe"
							if (find_pipe)   loop_stop = true;
							break;
						case Byte_ascii.Eq:     // handle "find_equals"
							if (find_equals) loop_stop = true;
							break;
						default:                // handle "cur_closing"; specified by piece.close and rule.close, so "\n", "}", "]" and "}-"
							if (cur_closing != Bry_.Empty) {
								byte cur_closing_0 = cur_closing[0];
								if (b == cur_closing_0) {
									if (cur_closing.length == 1) {	// handle "\n", "}", "]"
										loop_stop = true;
									}
									else {// handle "}-"
										int nxt_idx = j + 1;
										if (nxt_idx < src_len && src[nxt_idx] == Byte_ascii.Dash)
											loop_stop = true;
									}
								}
							}
							break;
					}
					if (loop_stop)
						break;
					else
						literal_len++;
				}
				if (literal_len > 0) {
					accum.Add_bry_escape_html(src, i, i + literal_len);
					i += literal_len;
				}
				if (i >= src_len) {
					if (Bry_.Eq(cur_closing, Byte_ascii.Nl_bry)) {
						// Do a past-the-end run to finish off the heading
						cur_char = Bry_.Empty;
						found = Found__line_end;
					}
					else {
						// All done
						break;
					}
				}
				else {
					// PORTED: "if ( $curChar == '|' ) {", etc..
					Xomw_prepro_curchar_itm cur_char_itm = (Xomw_prepro_curchar_itm)cur_char_trie.Match_at(trv, src, i, src_len);
					if (cur_char_itm != null) {
						cur_char = cur_char_itm.bry;
						switch (cur_char_itm.type) {
							case Byte_ascii.Pipe:         found = Found__pipe; break;
							case Byte_ascii.Eq:           found = Found__equals; break;
							case Byte_ascii.Angle_bgn:    found = Found__angle; break;
							case Byte_ascii.Nl:           found = in_heading ? Found__line_end : Found__line_bgn; break;

							// PORTED: "elseif ( $curChar == $currentClosing )"
							case Byte_ascii.Curly_end:    found = Found__close; break;
							case Byte_ascii.Brack_end:    found = Found__close; break;
							case Byte_ascii.At:           found = Found__close; break;	// NOTE: At is type for "}-"

							// PORTED: "elseif ( isset( $this->rules[$curChar] ) )"
							case Byte_ascii.Curly_bgn:   {found = Found__open; rule = rule_curly; break;}
							case Byte_ascii.Brack_bgn:   {found = Found__open; rule = rule_brack; break;}
							case Byte_ascii.Dash:        {found = Found__open; rule = rule_langv; break;}
						}
					}
					else {
						i++;
						continue;
					}
				}
			}

			if (found == Found__angle) {
				// Handle </onlyinclude>
				if (	enable_only_include
					&&	Bry_.Eq(src, i, i + Len__only_include_end, Bry__only_include_end)) {
					find_only_include = true;
					continue;
				}

				// Determine element name
				// PORTED: $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA"; EX: "(pre|ref)(?:\s|\/>|>)|(!--)
				Xomw_prepro_elem element = (Xomw_prepro_elem)elements_trie.Match_at(trv, src, i + 1, src_len);
				if (element == null) {
					// Element name missing or not listed
					accum.Add(Bry__escaped_lt);
					i++;
					continue;
				}

				// Handle comments
				if (element.type == Xomw_prepro_elem.Type__comment) {
					// To avoid leaving blank lines, when a sequence of
					// space-separated comments is both preceded and followed by
					// a newline (ignoring spaces), then
					// trim leading and trailing spaces and the trailing newline.

					// Find the end
					int end_pos = Bry_find_.Find_fwd(src, Bry__comment_end, i + 4, src_len);
					if (end_pos == Bry_find_.Not_found) {
						// Unclosed comment in input, runs to end
						accum.Add_str_a7("<comment>").Add_bry_escape_html(src, i, src_len).Add_str_a7("</comment>");
						i = src_len;
					}
					else {
						// Search backwards for leading whitespace
						int ws_bgn = i > 0 ? i - XophpString.strspn_bwd__space_or_tab(src, i, -1) : 0;

						// Search forwards for trailing whitespace
						// $wsEnd will be the position of the last space (or the '>' if there's none)
						int ws_end = end_pos + 2 + XophpString.strspn_fwd__space_or_tab(src, end_pos + 3, -1, src_len);

						// Keep looking forward as long as we're finding more
						// comments.
						comments_list.Clear();
						comments_list.Add(new int[] {ws_bgn, ws_end});
						while (ws_end + 5 < src_len && Bry_.Eq(src, ws_end + 1, ws_end + 5, Bry__comment_bgn)) {
							int cur_char_pos = Bry_find_.Find_fwd(src, Bry__comment_end, ws_end + 4);
							if (cur_char_pos == Bry_find_.Not_found) {
								break;
							}
							cur_char_pos = cur_char_pos + 2 + XophpString.strspn_fwd__space_or_tab(src, cur_char_pos + 3, -1, src_len);
							comments_list.Add(new int[] {ws_end + 1, cur_char_pos});
							ws_end = cur_char_pos;
						}

						// Eat the line if possible
						// TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at
						// the overall start. That's not how Sanitizer::removeHTMLcomments() did it, but
						// it's a possible beneficial b/c break.
						int bgn_pos = -1;
						if (	ws_bgn > 0 
							&&	Bry_.Eq(src, ws_bgn - 1, ws_bgn    , Byte_ascii.Nl_bry)
							&&	Bry_.Eq(src, ws_end + 1, ws_end + 2, Byte_ascii.Nl_bry)
						) {
							// Remove leading whitespace from the end of the accumulator
							// Sanity check first though
							int ws_len = i - ws_bgn;
							int accum_len = accum.Len();
							if (	ws_len > 0
								&&	XophpString.strspn_fwd__space_or_tab(accum.Bfr(), accum_len - ws_len, -1, accum_len) == ws_len) {
								accum.Del_by(ws_len);
							}

							// Dump all but the last comment to the accumulator
							int comments_list_len = comments_list.Len();
							for (int j = 0; j < comments_list_len; j++) {
								int[] com = (int[])comments_list.Get_at(j);
								bgn_pos = com[0];
								end_pos = com[1] + 1;
								if (j == comments_list_len - 1) {
									break;
								}
								inner = Bry_.Mid(src, bgn_pos, end_pos);
								accum.Add_str_a7("<comment>").Add_bry_escape_html(inner).Add_str_a7("</comment>");
							}

							// Do a line-start run next time to look for headings after the comment
							fake_line_start = true;
						}
						else {
							// No line to eat, just take the comment itself
							bgn_pos = i;
							end_pos += 2;
						}

						if (stack.top != null) {
							Xomw_prepro_part part = stack.top.Get_current_part();
							if (!(part.comment_end != -1 && part.comment_end == ws_bgn - 1)) {
								part.visual_end = ws_bgn;
							}
							// Else comments abutting, no change in visual end
							part.comment_end = end_pos;
						}
						i = end_pos + 1;
						inner = Bry_.Mid(src, bgn_pos, end_pos + 1);
						accum.Add_str_a7("<comment>").Add_bry_escape_html(inner).Add_str_a7("</comment>");
					}
					continue;
				}

				byte[] name = element.name;
				// RELIC.BTRIE_CI: $lowerName = strtolower( $name );
				int atr_bgn = i + name.length + 1;

				// Find end of tag
				int tag_end_pos = no_more_gt ? Bry_find_.Not_found : Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, atr_bgn);
				if (tag_end_pos == Bry_find_.Not_found) {
					// Infinite backtrack
					// Disable tag search to prevent worst-case O(N^2) performance
					no_more_gt = true;
					accum.Add(Bry__escaped_lt);
					i++;
					continue;
				}

				// Handle ignored tags
				if (ignored_tags.Has(name)) {
					accum.Add_str_a7("<ignore>").Add_bry_escape_html(src, i, tag_end_pos + 1).Add_str_a7("</ignore>");
					i = tag_end_pos + 1;
					continue;
				}

				int tag_bgn_pos = i;
				int atr_end = -1;
				byte[] close = null;
				if (src[tag_end_pos - 1] == Byte_ascii.Slash) {
					atr_end = tag_end_pos - 1;
					inner = null;
					i = tag_end_pos + 1;
					close = Bry_.Empty;
				}
				else {
					atr_end = tag_end_pos;
					// Find closing tag
					// PORTED: `preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",`
					boolean elem_end_found = false;
					int elem_end_lhs = -1, elem_end_rhs = -1;
					int elem_end_cur = tag_end_pos + 1;
					while (true) {
						// search for "</"
						elem_end_lhs = Bry_find_.Find_fwd(src, Bry__end_lhs, elem_end_cur, src_len);
						if (elem_end_lhs == Bry_find_.Not_found) {
							break;
						}

						// verify $name
						elem_end_cur = elem_end_lhs + 2;	// 2="</"
						int elem_end_tmp = elem_end_cur + name.length;
						if (!Bry_.Eq_ci_a7(name, src, elem_end_cur, elem_end_tmp)) {
							continue;
						}

						// verify "\s*>"
						elem_end_cur = elem_end_tmp;
						elem_end_cur = Bry_find_.Find_fwd_while(src, elem_end_cur, src_len, Byte_ascii.Space);
						if (elem_end_cur == src_len) {	// just "\s", but no ">"
							break;
						}
						if (src[elem_end_cur] == Byte_ascii.Gt) {
							elem_end_rhs = elem_end_cur + 1;
							elem_end_found = true;
							break;
						}
					}
					if (	!no_more_closing_tag.Has(name)
						&&	elem_end_found) {
						inner = Bry_.Mid(src, tag_end_pos + 1, elem_end_lhs);
						i = elem_end_rhs;
						tmp_bfr.Add_str_a7("<close>").Add_bry_escape_html(src, elem_end_lhs, elem_end_rhs).Add_str_a7("</close>");
						close = tmp_bfr.To_bry_and_clear();
					} 
					else {
						// No end tag
						if (xmlish_allow_missing_end_tag.Has(name)) {
							// Let it run out to the end of the src.
							inner = Bry_.Mid(src, tag_end_pos + 1);
							i = src_len;
							close = Bry_.Empty;
						}
						else {
							// Don't match the tag, treat opening tag as literal and resume parsing.
							i = tag_end_pos + 1;
							accum.Add_bry_escape_html(src, tag_bgn_pos, tag_end_pos + 1);
							// Cache results, otherwise we have O(N^2) performance for input like <foo><foo><foo>...
							no_more_closing_tag.Add_if_dupe_use_nth(name, name);
							continue;
						}
					}
				}

				// <includeonly> and <noinclude> just become <ignore> tags
				if (ignored_elements.Has(name)) {
					accum.Add_str_a7("<ignore>").Add_bry_escape_html(src, tag_bgn_pos, i).Add_str_a7("</ignore>");
					continue;
				}

				accum.Add_str_a7("<ext>");
				// PORTED:
				// if ( $attrEnd <= $attrStart ) {
				//	 $attr = '';
				// } else {
				//   $attr = substr( $text, $attrStart, $attrEnd - $attrStart );
				// }
				accum.Add_str_a7("<name>").Add(name).Add_str_a7("</name>");
				// Note that the attr element contains the whitespace between name and attribute,
				// this is necessary for precise reconstruction during pre-save transform.
				accum.Add_str_a7("<attr>");
				if (atr_end > atr_bgn)
					accum.Add_bry_escape_html(src, atr_bgn, atr_end);
				accum.Add_str_a7("</attr>");
				if (inner != null) {
					accum.Add_str_a7("<inner>").Add_bry_escape_html(inner).Add_str_a7("</inner>");
				}
				accum.Add(close).Add_str_a7("</ext>");
			}
			else if (found == Found__line_bgn) {
				// Is this the start of a heading?
				// Line break belongs before the heading element in any case
				if (fake_line_start) {
					fake_line_start = false;
				} else {
					accum.Add(cur_char);
					i++;
				}

				int count = XophpString.strspn_fwd__byte(src, Byte_ascii.Eq, i, 6, src_len);
				if (count == 1 && find_equals) {	// EX: "{{a|\n=b=\n"
					// DWIM: This looks kind of like a name/value separator.
					// Let's let the equals handler have it and break the
					// potential heading. This is heuristic, but AFAICT the
					// methods for completely correct disambiguation are very
					// complex.
				}
				else if (count > 0) {
					Xomw_prepro_piece piece = new Xomw_prepro_piece(Byte_ascii.Nl_bry, Byte_ascii.Nl_bry, count, i, false);
					piece.Add_part(Bry_.Repeat(Byte_ascii.Eq, count));
					stack.Push(piece);
					accum = stack.Get_accum();
					Xomw_prepro_flags flags = stack.Get_flags();
					find_pipe = flags.Find_pipe;
					find_equals = flags.Find_eq;
					in_heading = flags.In_heading;
					i += count;
				}
			}
			else if (found == Found__line_end) {
				Xomw_prepro_piece piece = stack.top;
				// A heading must be open, otherwise \n wouldn't have been in the search list
				if (!Bry_.Eq(piece.open, Byte_ascii.Nl_bry)) throw Err_.new_wo_type("assertion:piece must start with \\n");
				Xomw_prepro_part part = piece.Get_current_part();

				// Search back through the input to see if it has a proper close.
				// Do this using the reversed String since the other solutions
				// (end anchor, etc.) are inefficient.
				int ws_len = XophpString.strspn_bwd__space_or_tab(src, src_len - i, -1);
				int search_bgn = i - ws_len;

				if (part.comment_end != -1 && search_bgn -1 == part.comment_end) {
					// Comment found at line end
					// Search for equals signs before the comment
					search_bgn = part.visual_end;
					search_bgn = Bry_find_.Find_bwd__while_space_or_tab(src, search_bgn, 0);
					search_bgn -= XophpString.strspn_bwd__space_or_tab(src, search_bgn, -1);
				}
				int count = piece.count;
				int eq_len = XophpString.strspn_bwd__byte(src, Byte_ascii.Eq, search_bgn, -1);

				byte[] element = Bry_.Empty;
				if (eq_len > 0) {
					if (search_bgn - eq_len == piece.start_pos) {
						// This is just a single String of equals signs on its own line
						// Replicate the doHeadings behavior /={count}(.+)={count}/
						// First find out how many equals signs there really are (don't stop at 6)
						count = eq_len;
						if (count < 3) {
							count = 0;
						}
						else {
							count = (count - 1) / 2;
							if (count > 6) count = 6;
						}
					} 
					else {
						if (eq_len < count)	count = eq_len;	// PORTED: $count = min( $equalsLength, $count );
					}
					if (count > 0) {
						// Normal match, output <h>
						element = tmp_bfr.Add_str_a7("<h level=\"").Add_int_variable(count).Add_str_a7("\" i=\"").Add_int_variable(heading_index).Add_str_a7("\">").Add_bfr_and_preserve(accum).Add_str_a7("</h>").To_bry_and_clear();
						heading_index++;
					} else {
						// Single equals sign on its own line, count=0
						element = accum.To_bry();
					}
				}
				else {
					// No match, no <h>, just pass down the inner src
					element = accum.To_bry();
				}

				// Unwind the stack
				stack.Pop();
				accum = stack.Get_accum();
				
				Xomw_prepro_flags flags = stack.Get_flags();
				find_pipe = flags.Find_pipe;
				find_equals = flags.Find_eq;
				in_heading = flags.In_heading;

				// Append the result to the enclosing accumulator
				accum.Add(element);
				// Note that we do NOT increment the input pointer.
				// This is because the closing linebreak could be the opening linebreak of
				// another heading. Infinite loops are avoided because the next iteration MUST
				// hit the heading open case above, which unconditionally increments the
				// input pointer.
			}
			else if (found == Found__open) {
				// count opening brace characters
				int count = XophpString.strspn_fwd__byte(src, cur_char[0], i, -1, src_len);	// NOTE: don't know how MediaWiki will handle "-{"

				// we need to add to stack only if opening brace count is enough for one of the rules
				if (count >= rule.min) {
					// Add it to the stack
					Xomw_prepro_piece piece = new Xomw_prepro_piece(cur_char, rule.end, count, -1, i > 0 && src[i - 1] == Byte_ascii.Nl);

					stack.Push(piece);
					accum = stack.Get_accum();
					Xomw_prepro_flags flags = stack.Get_flags();
					find_pipe = flags.Find_pipe;
					find_equals = flags.Find_eq;
					in_heading = flags.In_heading;
				}
				else {
					// Add literal brace(s)
					for (int j = 0; j < count; j++)
						accum.Add_bry_escape_html(cur_char);
				}
				i += count;
			}
			else if (found == Found__close) {
				Xomw_prepro_piece piece = stack.top;
				// lets check if there are enough characters for closing brace
				int max_count = piece.count;
				int count = XophpString.strspn_fwd__byte(src, cur_char[0], i, max_count, src_len);

				// check for maximum matching characters (if there are 5 closing characters, we will probably need only 3 - depending on the rules)
				rule = Get_rule(piece.open);
				int matching_count = -1;
				if (count > rule.max) {
					// The specified maximum exists in the callback array, unless the caller
					// has made an error
					matching_count = rule.max;
				}
				else {
					// Count is less than the maximum
					// Skip any gaps in the callback array to find the true largest match
					// Need to use array_key_exists not isset because the callback can be null
					matching_count = count;
					while (matching_count > 0 && !rule.Names_exist(matching_count)) {
						matching_count--;
					}
				}

				if (matching_count <= 0) {
					// No matching element found in callback array
					// Output a literal closing brace and continue
					for (int j = 0; j < count; j++)
						accum.Add_bry_escape_html(cur_char);
					i += count;
					continue;
				}
				int name_type = rule.names[matching_count];
				byte[] element = null;
				if (name_type == Xomw_prepro_rule.Name__null) {
					// No element, just literal text
					tmp_bfr.Add(piece.Break_syntax(tmp_bfr, matching_count));
					element = tmp_bfr.Add(Bry_.Repeat_bry(rule.end, matching_count)).To_bry_and_clear();
				}
				else {
					// Create XML element
					// Note: $parts is already XML, does not need to be encoded further
					List_adp parts = piece.parts;
					byte[] title = ((Xomw_prepro_part)parts.Get_at(0)).bfr.To_bry_and_clear();
					parts.Del_at(0);

					// The invocation is at the start of the line if lineStart is set in
					// the stack, and all opening brackets are used up.
					byte[] attr = null;
					if (max_count == matching_count && piece.line_start) {	// RELIC:!empty( $piece->lineStart )
						attr = Bry_.new_a7(" lineStart=\"1\"");
					}
					else {
						attr = Bry_.Empty;
					}

					byte[] name_bry = Xomw_prepro_rule.Name(name_type);
					tmp_bfr.Add_str_a7("<").Add(name_bry).Add(attr).Add_str_a7(">");
					tmp_bfr.Add_str_a7("<title>").Add(title).Add_str_a7("</title>");

					int arg_idx = 1;
					int parts_len = parts.Len();
					for (int j = 0; j < parts_len; j++) {
						Xomw_prepro_part part = (Xomw_prepro_part)parts.Get_at(j);
						if (part.Eqpos != -1) {
							Bry_bfr part_bfr = part.bfr;
							byte[] part_bfr_bry = part_bfr.Bfr();
							tmp_bfr.Add_str_a7("<part><name>").Add_mid(part_bfr_bry, 0, part.Eqpos);
							tmp_bfr.Add_str_a7("</name>=<value>").Add_mid(part_bfr_bry, part.Eqpos + 1, part_bfr.Len());
							tmp_bfr.Add_str_a7("</value></part>");
						}
						else {
							tmp_bfr.Add_str_a7("<part><name index=\"").Add_int_variable(arg_idx).Add_str_a7("\" /><value>").Add(part.bfr.To_bry()).Add_str_a7("</value></part>");
							arg_idx++;
						}
					}
					element = tmp_bfr.Add_str_a7("</").Add(name_bry).Add_str_a7(">").To_bry_and_clear();
				}

				// Advance input pointer
				i += matching_count;

				// Unwind the stack
				stack.Pop();
				accum = stack.Get_accum();

				// Re-add the old stack element if it still has unmatched opening characters remaining
				if (matching_count < piece.count) {
					piece.Parts__renew(); // PORTED: piece.parts = [ new PPDPart ];
					piece.count -= matching_count;

					// do we still qualify for any callback with remaining count?
					int min = Get_rule(piece.open).min;
					if (piece.count >= min) {
						stack.Push(piece);
						accum = stack.Get_accum();
					}
					else {
						accum.Add(Bry_.Repeat_bry(piece.open, piece.count));
					}
				}

				Xomw_prepro_flags flags = stack.Get_flags();
				find_pipe = flags.Find_pipe;
				find_equals = flags.Find_eq;
				in_heading = flags.In_heading;

				// Add XML element to the enclosing accumulator
				accum.Add(element);
			}
			else if (found == Found__pipe) {
				find_equals = true; // shortcut for getFlags()
				stack.Add_part(Bry_.Empty);
				accum = stack.Get_accum();
				i++;
			}
			else if (found == Found__equals) {
				find_equals = false; // shortcut for getFlags()
				stack.Get_current_part().Eqpos = accum.Len();
				accum.Add_byte(Byte_ascii.Eq);
				i++;
			}
		}

		// Output any remaining unclosed brackets
		Bry_bfr root_accum = stack.Get_root_accum();
		int stack_len = stack.stack.Len();
		for (int j = 0; j < stack_len; j++) {
			Xomw_prepro_piece piece = (Xomw_prepro_piece)stack.stack.Get_at(j);
			root_accum.Add(piece.Break_syntax(tmp_bfr, -1));
		}
		root_accum.Add_str_a7("</root>");
		return root_accum.To_bry_and_clear();
	}
	private Xomw_prepro_rule Get_rule(byte[] bry) {
		if		(Bry_.Eq(bry, rule_curly.bgn))   return rule_curly;
		else if	(Bry_.Eq(bry, rule_brack.bgn))   return rule_brack;
		else if	(Bry_.Eq(bry, rule_langv.bgn))   return rule_langv;
		else                                     throw Err_.new_unhandled(bry);
	}
	private static final int 
	  Found__line_bgn = 0
	, Found__line_end = 1
	, Found__pipe = 2
	, Found__equals = 3
	, Found__angle = 4
	, Found__close = 5
	, Found__open = 6
	;
	private static final    Xomw_prepro_rule 
	  rule_curly = new Xomw_prepro_rule(Bry_.new_a7("{"), Bry_.new_a7("}")  , 2, 3, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__tmpl, Xomw_prepro_rule.Name__targ})
	, rule_brack = new Xomw_prepro_rule(Bry_.new_a7("["), Bry_.new_a7("]")  , 2, 2, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__null})
	, rule_langv = new Xomw_prepro_rule(Bry_.new_a7("-{"), Bry_.new_a7("}-"), 1, 1, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__null})
	;
	private static final    byte[] 
	  Bry__only_include_bgn = Bry_.new_a7("<onlyinclude>")
	, Bry__only_include_end = Bry_.new_a7("</onlyinclude>")
	, Bry__comment_bgn  = Bry_.new_a7("<!--")
	, Bry__comment_end  = Bry_.new_a7("-->")
	, Bry__escaped_lt   = Bry_.new_a7("<")
	, Bry__end_lhs      = Bry_.new_a7("</")
	;
	private static final    int Len__only_include_end = Bry__only_include_end.length;
	private static final    Btrie_slim_mgr cur_char_trie = Cur_char_trie__new();
	private static final    Ordered_hash
	  ignored_tags_y     = Ordered_hash_.New_bry().Add_many_str("includeonly", "/includeonly")
	, ignored_tags_n     = Ordered_hash_.New_bry().Add_many_str("noinclude", "/noinclude", "onlyinclude", "/onlyinclude");
	private static final    Hash_adp_bry 
	  ignored_elements__y   = Hash_adp_bry.cs().Add_many_str("noinclude")
	, ignored_elements__n = Hash_adp_bry.cs().Add_many_str("includeonly");
	private static Btrie_slim_mgr Cur_char_trie__new() {
		Btrie_slim_mgr rv = Btrie_slim_mgr.ci_a7();
		String[] ary = new String[] {"|", "=", "<", "\n", "{", "[", "-{", "}", "]"};
		for (String str : ary) {
			byte[] bry = Bry_.new_a7(str);
			rv.Add_obj(bry, new Xomw_prepro_curchar_itm(bry, bry[0]));
		}

		// handle "}-" separately
		byte[] langv_end = Bry_.new_a7("}-");
		rv.Add_obj(langv_end, new Xomw_prepro_curchar_itm(langv_end, Byte_ascii.At));
		return rv;
	}
}