/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; import gplx.xowa.mediawiki.includes.parsers.*;
import gplx.core.btries.*;
public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
private final Bry_bfr tmp_bfr = Bry_bfr_.New();
private final List_adp comments_list = List_adp_.New();
private final Btrie_slim_mgr elements_trie__y = Btrie_slim_mgr.ci_a7(), elements_trie__n = Btrie_slim_mgr.ci_a7();
private final Hash_adp_bry xmlish_allow_missing_end_tag = Hash_adp_bry.cs().Add_many_str("includeonly", "noinclude", "onlyinclude");
private final Hash_adp_bry no_more_closing_tag = Hash_adp_bry.cs();
private final Xomw_prepro_stack stack = new Xomw_prepro_stack();
private final Btrie_rv trv = new Btrie_rv();
private Bry_bfr accum = Bry_bfr_.New();
public void Init_by_wiki(String... xmlish_elems_ary) {
Elements_trie__init_by_wiki(elements_trie__y, ignored_tags_y, xmlish_elems_ary, "noinclude");
Elements_trie__init_by_wiki(elements_trie__n, ignored_tags_n, xmlish_elems_ary, "includeonly");
}
private void Elements_trie__init_by_wiki(Btrie_slim_mgr trie, Ordered_hash ignored_tags, String[] strip_list_ary, String xmlish_elem) {
trie.Clear();
Elements_trie__add(trie, Bool_.Y, "!--", "comment");
// PORTED: $xmlishElements = parser->getStripList();
for (String itm : strip_list_ary) {
Elements_trie__add(trie, Bool_.N, itm, itm);
}
// PORTED: "$xmlishElements[] = 'noinclude';" or "$xmlishElements[] = 'includeonly';"
Elements_trie__add(trie, Bool_.N, xmlish_elem, xmlish_elem);
// PORTED: $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );
int ignored_tags_len = ignored_tags.Count();
for (int j = 0; j < ignored_tags_len; j++) {
byte[] bry = (byte[])ignored_tags.Get_at(j);
String str = String_.new_u8(bry);
Elements_trie__add(trie, Bool_.N, str, str);
}
}
private static void Elements_trie__add(Btrie_slim_mgr trie, boolean type_is_comment, String hook, String name) {
trie.Add_obj(hook, new Xomw_prepro_elem(type_is_comment ? Xomw_prepro_elem.Type__comment : Xomw_prepro_elem.Type__other, Bry_.new_a7(name)));
}
public byte[] Preprocess_to_xml(byte[] src, boolean for_inclusion) {
// RELIC.PROC_VAR: forInclusion = $flags & Parser::PTD_FOR_INCLUSION;
// RELIC.INIT_BY_WIKI: $xmlishElements = parser->getStripList();
// RELIC.CLASS_VAR: $xmlishAllowMissingEndTag = [ 'includeonly', 'noinclude', 'onlyinclude' ];
boolean enable_only_include = false;
// PORTED: rewritten so that all add / del is done in INIT_BY_WIKI
Ordered_hash ignored_tags;
Hash_adp ignored_elements;
Btrie_slim_mgr elements_trie;
if (for_inclusion) {
ignored_tags = ignored_tags_y; // RELIC: $ignoredTags = [ 'includeonly', '/includeonly' ];
ignored_elements = ignored_elements__y; // RELIC: $ignoredElements = [ 'noinclude' ];
// RELIC.INIT_BY_WIKI: $xmlishElements[] = 'noinclude';
if ( Bry_.Has(src, Bry__only_include_bgn)
&& Bry_.Has(src, Bry__only_include_end)) {
enable_only_include = true;
}
elements_trie = elements_trie__y;
}
else {
ignored_tags = ignored_tags_n; // $ignoredTags = [ 'noinclude', '/noinclude', 'onlyinclude', '/onlyinclude' ];
ignored_elements = ignored_elements__n; // $ignoredElements = [ 'includeonly' ];
// RELIC.INIT_BY_WIKI: $xmlishElements[] = 'includeonly';
elements_trie = elements_trie__n;
}
// RELIC.INIT_BY_WIKI: $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );
// RELIC.REGEX
// Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset
// $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA";
stack.Clear();
// RELIC.REGEX:
// $searchBase = "[{<\n"; # }
// RELIC.BRY_FIND
// For fast reverse searches
// $revText = strrev( $text );
// $lengthText = strlen( $text );
// Input pointer, starts out pointing to a pseudo-newline before the start
int i = 0;
// Current accumulator
accum = stack.Get_accum();
accum.Add_str_a7("<root>");
// True to find equals signs in arguments
boolean find_equals = false;
// True to take notice of pipe characters
boolean find_pipe = false;
int heading_index = 1;
// True if $i is inside a possible heading
boolean in_heading = false;
// True if there are no more greater-than (>) signs right of $i
boolean no_more_gt = false;
// Map of tag name => true if there are no more closing tags of given type right of $i
no_more_closing_tag.Clear();
// True to ignore all input up to the next <onlyinclude>
boolean find_only_include = enable_only_include;
// Do a line-start run without outputting an LF character
boolean fake_line_start = true;
// XOWA: init
int src_len = src.length;
int found = -1;
byte[] cur_char = Bry_.Empty;
byte[] cur_closing = Bry_.Empty;
byte[] inner = null;
Xomw_prepro_rule rule = null;
while (true) {
if (find_only_include) {
// Ignore all input up to the next <onlyinclude>
int start_pos = Bry_find_.Find_fwd(src, Bry__only_include_bgn, i, src_len);
if (start_pos == Bry_find_.Not_found) {
// Ignored section runs to the end
accum.Add_str_a7("<ignore>").Add_bry_escape_html(src, i, src_len).Add_str_a7("</ignore>");
break;
}
int tag_end_pos = start_pos + Bry__only_include_bgn.length; // past-the-end
accum.Add_str_a7("<ignore>").Add_bry_escape_html(src, i, tag_end_pos).Add_str_a7("</ignore>");
i = tag_end_pos;
find_only_include = false;
}
if (fake_line_start) {
found = Found__line_bgn;
cur_char = Bry_.Empty;
}
else {
// Find next opening brace, closing brace or pipe
// RELIC.REGEX: $search = $searchBase;
if (stack.top == null) {
cur_closing = Bry_.Empty;
}
else {
cur_closing = stack.top.close;
// RELIC.REGEX: $search .= $currentClosing;
}
if (find_pipe) {
// RELIC.REGEX: $search .= '|';
}
if (find_equals) {
// First equals will be for the template
// RELIC.REGEX: $search .= '=';
}
// Output literal section, advance input counter
// PORTED: "$literalLength = strcspn(src, $search, i)"; NOTE: no trie b/c of frequent changes to $search
int literal_len = 0;
boolean loop_stop = false;
// loop chars until search_char is found
for (int j = i; j < src_len; j++) {
byte b = src[j];
switch (b) { // handle '$searchBase = "[{<\n";'
case Byte_ascii.Brack_bgn:
case Byte_ascii.Curly_bgn:
case Byte_ascii.Angle_bgn:
case Byte_ascii.Nl:
loop_stop = true;
break;
case Byte_ascii.Pipe: // handle "find_pipe"
if (find_pipe) loop_stop = true;
break;
case Byte_ascii.Eq: // handle "find_equals"
if (find_equals) loop_stop = true;
break;
default: // handle "cur_closing"; specified by piece.close and rule.close, so "\n", "}", "]" and "}-"
if (cur_closing != Bry_.Empty) {
byte cur_closing_0 = cur_closing[0];
if (b == cur_closing_0) {
if (cur_closing.length == 1) { // handle "\n", "}", "]"
loop_stop = true;
}
else {// handle "}-"
int nxt_idx = j + 1;
if (nxt_idx < src_len && src[nxt_idx] == Byte_ascii.Dash)
loop_stop = true;
}
}
}
break;
}
if (loop_stop)
break;
else
literal_len++;
}
if (literal_len > 0) {
accum.Add_bry_escape_html(src, i, i + literal_len);
i += literal_len;
}
if (i >= src_len) {
if (Bry_.Eq(cur_closing, Byte_ascii.Nl_bry)) {
// Do a past-the-end run to finish off the heading
cur_char = Bry_.Empty;
found = Found__line_end;
}
else {
// All done
break;
}
}
else {
// PORTED: "if ( $curChar == '|' ) {", etc..
Xomw_prepro_curchar_itm cur_char_itm = (Xomw_prepro_curchar_itm)cur_char_trie.Match_at(trv, src, i, src_len);
if (cur_char_itm != null) {
cur_char = cur_char_itm.bry;
switch (cur_char_itm.type) {
case Byte_ascii.Pipe: found = Found__pipe; break;
case Byte_ascii.Eq: found = Found__equals; break;
case Byte_ascii.Angle_bgn: found = Found__angle; break;
case Byte_ascii.Nl: found = in_heading ? Found__line_end : Found__line_bgn; break;
// PORTED: "elseif ( $curChar == $currentClosing )"
case Byte_ascii.Curly_end: found = Found__close; break;
case Byte_ascii.Brack_end: found = Found__close; break;
case Byte_ascii.At: found = Found__close; break; // NOTE: At is type for "}-"
// PORTED: "elseif ( isset( $this->rules[$curChar] ) )"
case Byte_ascii.Curly_bgn: {found = Found__open; rule = rule_curly; break;}
case Byte_ascii.Brack_bgn: {found = Found__open; rule = rule_brack; break;}
case Byte_ascii.Dash: {found = Found__open; rule = rule_langv; break;}
}
}
else {
i++;
continue;
}
}
}
if (found == Found__angle) {
// Handle </onlyinclude>
if ( enable_only_include
&& Bry_.Eq(src, i, i + Len__only_include_end, Bry__only_include_end)) {
find_only_include = true;
continue;
}
// Determine element name
// PORTED: $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA"; EX: "(pre|ref)(?:\s|\/>|>)|(!--)
Xomw_prepro_elem element = (Xomw_prepro_elem)elements_trie.Match_at(trv, src, i + 1, src_len);
if (element == null) {
// Element name missing or not listed
accum.Add(Bry__escaped_lt);
i++;
continue;
}
// Handle comments
if (element.type == Xomw_prepro_elem.Type__comment) {
// To avoid leaving blank lines, when a sequence of
// space-separated comments is both preceded and followed by
// a newline (ignoring spaces), then
// trim leading and trailing spaces and the trailing newline.
// Find the end
int end_pos = Bry_find_.Find_fwd(src, Bry__comment_end, i + 4, src_len);
if (end_pos == Bry_find_.Not_found) {
// Unclosed comment in input, runs to end
accum.Add_str_a7("<comment>").Add_bry_escape_html(src, i, src_len).Add_str_a7("</comment>");
i = src_len;
}
else {
// Search backwards for leading whitespace
int ws_bgn = i > 0 ? i - XophpString.strspn_bwd__space_or_tab(src, i, -1) : 0;
// Search forwards for trailing whitespace
// $wsEnd will be the position of the last space (or the '>' if there's none)
int ws_end = end_pos + 2 + XophpString.strspn_fwd__space_or_tab(src, end_pos + 3, -1, src_len);
// Keep looking forward as long as we're finding more
// comments.
comments_list.Clear();
comments_list.Add(new int[] {ws_bgn, ws_end});
while (ws_end + 5 < src_len && Bry_.Eq(src, ws_end + 1, ws_end + 5, Bry__comment_bgn)) {
int cur_char_pos = Bry_find_.Find_fwd(src, Bry__comment_end, ws_end + 4);
if (cur_char_pos == Bry_find_.Not_found) {
break;
}
cur_char_pos = cur_char_pos + 2 + XophpString.strspn_fwd__space_or_tab(src, cur_char_pos + 3, -1, src_len);
comments_list.Add(new int[] {ws_end + 1, cur_char_pos});
ws_end = cur_char_pos;
}
// Eat the line if possible
// TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at
// the overall start. That's not how Sanitizer::removeHTMLcomments() did it, but
// it's a possible beneficial b/c break.
int bgn_pos = -1;
if ( ws_bgn > 0
&& Bry_.Eq(src, ws_bgn - 1, ws_bgn , Byte_ascii.Nl_bry)
&& Bry_.Eq(src, ws_end + 1, ws_end + 2, Byte_ascii.Nl_bry)
) {
// Remove leading whitespace from the end of the accumulator
// Sanity check first though
int ws_len = i - ws_bgn;
int accum_len = accum.Len();
if ( ws_len > 0
&& XophpString.strspn_fwd__space_or_tab(accum.Bfr(), accum_len - ws_len, -1, accum_len) == ws_len) {
accum.Del_by(ws_len);
}
// Dump all but the last comment to the accumulator
int comments_list_len = comments_list.Len();
for (int j = 0; j < comments_list_len; j++) {
int[] com = (int[])comments_list.Get_at(j);
bgn_pos = com[0];
end_pos = com[1] + 1;
if (j == comments_list_len - 1) {
break;
}
inner = Bry_.Mid(src, bgn_pos, end_pos);
accum.Add_str_a7("<comment>").Add_bry_escape_html(inner).Add_str_a7("</comment>");
}
// Do a line-start run next time to look for headings after the comment
fake_line_start = true;
}
else {
// No line to eat, just take the comment itself
bgn_pos = i;
end_pos += 2;
}
if (stack.top != null) {
Xomw_prepro_part part = stack.top.Get_current_part();
if (!(part.comment_end != -1 && part.comment_end == ws_bgn - 1)) {
part.visual_end = ws_bgn;
}
// Else comments abutting, no change in visual end
part.comment_end = end_pos;
}
i = end_pos + 1;
inner = Bry_.Mid(src, bgn_pos, end_pos + 1);
accum.Add_str_a7("<comment>").Add_bry_escape_html(inner).Add_str_a7("</comment>");
}
continue;
}
byte[] name = element.name;
// RELIC.BTRIE_CI: $lowerName = strtolower( $name );
int atr_bgn = i + name.length + 1;
// Find end of tag
int tag_end_pos = no_more_gt ? Bry_find_.Not_found : Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, atr_bgn);
if (tag_end_pos == Bry_find_.Not_found) {
// Infinite backtrack
// Disable tag search to prevent worst-case O(N^2) performance
no_more_gt = true;
accum.Add(Bry__escaped_lt);
i++;
continue;
}
// Handle ignored tags
if (ignored_tags.Has(name)) {
accum.Add_str_a7("<ignore>").Add_bry_escape_html(src, i, tag_end_pos + 1).Add_str_a7("</ignore>");
i = tag_end_pos + 1;
continue;
}
int tag_bgn_pos = i;
int atr_end = -1;
byte[] close = null;
if (src[tag_end_pos - 1] == Byte_ascii.Slash) {
atr_end = tag_end_pos - 1;
inner = null;
i = tag_end_pos + 1;
close = Bry_.Empty;
}
else {
atr_end = tag_end_pos;
// Find closing tag
// PORTED: `preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",`
boolean elem_end_found = false;
int elem_end_lhs = -1, elem_end_rhs = -1;
int elem_end_cur = tag_end_pos + 1;
while (true) {
// search for "</"
elem_end_lhs = Bry_find_.Find_fwd(src, Bry__end_lhs, elem_end_cur, src_len);
if (elem_end_lhs == Bry_find_.Not_found) {
break;
}
// verify $name
elem_end_cur = elem_end_lhs + 2; // 2="</"
int elem_end_tmp = elem_end_cur + name.length;
if (!Bry_.Eq_ci_a7(name, src, elem_end_cur, elem_end_tmp)) {
continue;
}
// verify "\s*>"
elem_end_cur = elem_end_tmp;
elem_end_cur = Bry_find_.Find_fwd_while(src, elem_end_cur, src_len, Byte_ascii.Space);
if (elem_end_cur == src_len) { // just "\s", but no ">"
break;
}
if (src[elem_end_cur] == Byte_ascii.Gt) {
elem_end_rhs = elem_end_cur + 1;
elem_end_found = true;
break;
}
}
if ( !no_more_closing_tag.Has(name)
&& elem_end_found) {
inner = Bry_.Mid(src, tag_end_pos + 1, elem_end_lhs);
i = elem_end_rhs;
tmp_bfr.Add_str_a7("<close>").Add_bry_escape_html(src, elem_end_lhs, elem_end_rhs).Add_str_a7("</close>");
close = tmp_bfr.To_bry_and_clear();
}
else {
// No end tag
if (xmlish_allow_missing_end_tag.Has(name)) {
// Let it run out to the end of the src.
inner = Bry_.Mid(src, tag_end_pos + 1);
i = src_len;
close = Bry_.Empty;
}
else {
// Don't match the tag, treat opening tag as literal and resume parsing.
i = tag_end_pos + 1;
accum.Add_bry_escape_html(src, tag_bgn_pos, tag_end_pos + 1);
// Cache results, otherwise we have O(N^2) performance for input like <foo><foo><foo>...
no_more_closing_tag.Add_if_dupe_use_nth(name, name);
continue;
}
}
}
// <includeonly> and <noinclude> just become <ignore> tags
if (ignored_elements.Has(name)) {
accum.Add_str_a7("<ignore>").Add_bry_escape_html(src, tag_bgn_pos, i).Add_str_a7("</ignore>");
continue;
}
accum.Add_str_a7("<ext>");
// PORTED:
// if ( $attrEnd <= $attrStart ) {
// $attr = '';
// } else {
// $attr = substr( $text, $attrStart, $attrEnd - $attrStart );
// }
accum.Add_str_a7("<name>").Add(name).Add_str_a7("</name>");
// Note that the attr element contains the whitespace between name and attribute,
// this is necessary for precise reconstruction during pre-save transform.
accum.Add_str_a7("<attr>");
if (atr_end > atr_bgn)
accum.Add_bry_escape_html(src, atr_bgn, atr_end);
accum.Add_str_a7("</attr>");
if (inner != null) {
accum.Add_str_a7("<inner>").Add_bry_escape_html(inner).Add_str_a7("</inner>");
}
accum.Add(close).Add_str_a7("</ext>");
}
else if (found == Found__line_bgn) {
// Is this the start of a heading?
// Line break belongs before the heading element in any case
if (fake_line_start) {
fake_line_start = false;
} else {
accum.Add(cur_char);
i++;
}
int count = XophpString.strspn_fwd__byte(src, Byte_ascii.Eq, i, 6, src_len);
if (count == 1 && find_equals) { // EX: "{{a|\n=b=\n"
// DWIM: This looks kind of like a name/value separator.
// Let's let the equals handler have it and break the
// potential heading. This is heuristic, but AFAICT the
// methods for completely correct disambiguation are very
// complex.
}
else if (count > 0) {
Xomw_prepro_piece piece = new Xomw_prepro_piece(Byte_ascii.Nl_bry, Byte_ascii.Nl_bry, count, i, false);
piece.Add_part(Bry_.Repeat(Byte_ascii.Eq, count));
stack.Push(piece);
accum = stack.Get_accum();
Xomw_prepro_flags flags = stack.Get_flags();
find_pipe = flags.Find_pipe;
find_equals = flags.Find_eq;
in_heading = flags.In_heading;
i += count;
}
}
else if (found == Found__line_end) {
Xomw_prepro_piece piece = stack.top;
// A heading must be open, otherwise \n wouldn't have been in the search list
if (!Bry_.Eq(piece.open, Byte_ascii.Nl_bry)) throw Err_.new_wo_type("assertion:piece must start with \\n");
Xomw_prepro_part part = piece.Get_current_part();
// Search back through the input to see if it has a proper close.
// Do this using the reversed String since the other solutions
// (end anchor, etc.) are inefficient.
int ws_len = XophpString.strspn_bwd__space_or_tab(src, src_len - i, -1);
int search_bgn = i - ws_len;
if (part.comment_end != -1 && search_bgn -1 == part.comment_end) {
// Comment found at line end
// Search for equals signs before the comment
search_bgn = part.visual_end;
search_bgn = Bry_find_.Find_bwd__while_space_or_tab(src, search_bgn, 0);
search_bgn -= XophpString.strspn_bwd__space_or_tab(src, search_bgn, -1);
}
int count = piece.count;
int eq_len = XophpString.strspn_bwd__byte(src, Byte_ascii.Eq, search_bgn, -1);
byte[] element = Bry_.Empty;
if (eq_len > 0) {
if (search_bgn - eq_len == piece.start_pos) {
// This is just a single String of equals signs on its own line
// Replicate the doHeadings behavior /={count}(.+)={count}/
// First find out how many equals signs there really are (don't stop at 6)
count = eq_len;
if (count < 3) {
count = 0;
}
else {
count = (count - 1) / 2;
if (count > 6) count = 6;
}
}
else {
if (eq_len < count) count = eq_len; // PORTED: $count = min( $equalsLength, $count );
}
if (count > 0) {
// Normal match, output <h>
element = tmp_bfr.Add_str_a7("<h level=\"").Add_int_variable(count).Add_str_a7("\" i=\"").Add_int_variable(heading_index).Add_str_a7("\">").Add_bfr_and_preserve(accum).Add_str_a7("</h>").To_bry_and_clear();
heading_index++;
} else {
// Single equals sign on its own line, count=0
element = accum.To_bry();
}
}
else {
// No match, no <h>, just pass down the inner src
element = accum.To_bry();
}
// Unwind the stack
stack.Pop();
accum = stack.Get_accum();
Xomw_prepro_flags flags = stack.Get_flags();
find_pipe = flags.Find_pipe;
find_equals = flags.Find_eq;
in_heading = flags.In_heading;
// Append the result to the enclosing accumulator
accum.Add(element);
// Note that we do NOT increment the input pointer.
// This is because the closing linebreak could be the opening linebreak of
// another heading. Infinite loops are avoided because the next iteration MUST
// hit the heading open case above, which unconditionally increments the
// input pointer.
}
else if (found == Found__open) {
// count opening brace characters
int count = XophpString.strspn_fwd__byte(src, cur_char[0], i, -1, src_len); // NOTE: don't know how MediaWiki will handle "-{"
// we need to add to stack only if opening brace count is enough for one of the rules
if (count >= rule.min) {
// Add it to the stack
Xomw_prepro_piece piece = new Xomw_prepro_piece(cur_char, rule.end, count, -1, i > 0 && src[i - 1] == Byte_ascii.Nl);
stack.Push(piece);
accum = stack.Get_accum();
Xomw_prepro_flags flags = stack.Get_flags();
find_pipe = flags.Find_pipe;
find_equals = flags.Find_eq;
in_heading = flags.In_heading;
}
else {
// Add literal brace(s)
for (int j = 0; j < count; j++)
accum.Add_bry_escape_html(cur_char);
}
i += count;
}
else if (found == Found__close) {
Xomw_prepro_piece piece = stack.top;
// lets check if there are enough characters for closing brace
int max_count = piece.count;
int count = XophpString.strspn_fwd__byte(src, cur_char[0], i, max_count, src_len);
// check for maximum matching characters (if there are 5 closing characters, we will probably need only 3 - depending on the rules)
rule = Get_rule(piece.open);
int matching_count = -1;
if (count > rule.max) {
// The specified maximum exists in the callback array, unless the caller
// has made an error
matching_count = rule.max;
}
else {
// Count is less than the maximum
// Skip any gaps in the callback array to find the true largest match
// Need to use array_key_exists not isset because the callback can be null
matching_count = count;
while (matching_count > 0 && !rule.Names_exist(matching_count)) {
matching_count--;
}
}
if (matching_count <= 0) {
// No matching element found in callback array
// Output a literal closing brace and continue
for (int j = 0; j < count; j++)
accum.Add_bry_escape_html(cur_char);
i += count;
continue;
}
int name_type = rule.names[matching_count];
byte[] element = null;
if (name_type == Xomw_prepro_rule.Name__null) {
// No element, just literal text
tmp_bfr.Add(piece.Break_syntax(tmp_bfr, matching_count));
element = tmp_bfr.Add(Bry_.Repeat_bry(rule.end, matching_count)).To_bry_and_clear();
}
else {
// Create XML element
// Note: $parts is already XML, does not need to be encoded further
List_adp parts = piece.parts;
byte[] title = ((Xomw_prepro_part)parts.Get_at(0)).bfr.To_bry_and_clear();
parts.Del_at(0);
// The invocation is at the start of the line if lineStart is set in
// the stack, and all opening brackets are used up.
byte[] attr = null;
if (max_count == matching_count && piece.line_start) { // RELIC:!empty( $piece->lineStart )
attr = Bry_.new_a7(" lineStart=\"1\"");
}
else {
attr = Bry_.Empty;
}
byte[] name_bry = Xomw_prepro_rule.Name(name_type);
tmp_bfr.Add_str_a7("<").Add(name_bry).Add(attr).Add_str_a7(">");
tmp_bfr.Add_str_a7("<title>").Add(title).Add_str_a7("</title>");
int arg_idx = 1;
int parts_len = parts.Len();
for (int j = 0; j < parts_len; j++) {
Xomw_prepro_part part = (Xomw_prepro_part)parts.Get_at(j);
if (part.Eqpos != -1) {
Bry_bfr part_bfr = part.bfr;
byte[] part_bfr_bry = part_bfr.Bfr();
tmp_bfr.Add_str_a7("<part><name>").Add_mid(part_bfr_bry, 0, part.Eqpos);
tmp_bfr.Add_str_a7("</name>=<value>").Add_mid(part_bfr_bry, part.Eqpos + 1, part_bfr.Len());
tmp_bfr.Add_str_a7("</value></part>");
}
else {
tmp_bfr.Add_str_a7("<part><name index=\"").Add_int_variable(arg_idx).Add_str_a7("\" /><value>").Add(part.bfr.To_bry()).Add_str_a7("</value></part>");
arg_idx++;
}
}
element = tmp_bfr.Add_str_a7("</").Add(name_bry).Add_str_a7(">").To_bry_and_clear();
}
// Advance input pointer
i += matching_count;
// Unwind the stack
stack.Pop();
accum = stack.Get_accum();
// Re-add the old stack element if it still has unmatched opening characters remaining
if (matching_count < piece.count) {
piece.Parts__renew(); // PORTED: piece.parts = [ new PPDPart ];
piece.count -= matching_count;
// do we still qualify for any callback with remaining count?
int min = Get_rule(piece.open).min;
if (piece.count >= min) {
stack.Push(piece);
accum = stack.Get_accum();
}
else {
accum.Add(Bry_.Repeat_bry(piece.open, piece.count));
}
}
Xomw_prepro_flags flags = stack.Get_flags();
find_pipe = flags.Find_pipe;
find_equals = flags.Find_eq;
in_heading = flags.In_heading;
// Add XML element to the enclosing accumulator
accum.Add(element);
}
else if (found == Found__pipe) {
find_equals = true; // shortcut for getFlags()
stack.Add_part(Bry_.Empty);
accum = stack.Get_accum();
i++;
}
else if (found == Found__equals) {
find_equals = false; // shortcut for getFlags()
stack.Get_current_part().Eqpos = accum.Len();
accum.Add_byte(Byte_ascii.Eq);
i++;
}
}
// Output any remaining unclosed brackets
Bry_bfr root_accum = stack.Get_root_accum();
int stack_len = stack.stack.Len();
for (int j = 0; j < stack_len; j++) {
Xomw_prepro_piece piece = (Xomw_prepro_piece)stack.stack.Get_at(j);
root_accum.Add(piece.Break_syntax(tmp_bfr, -1));
}
root_accum.Add_str_a7("</root>");
return root_accum.To_bry_and_clear();
}
private Xomw_prepro_rule Get_rule(byte[] bry) {
if (Bry_.Eq(bry, rule_curly.bgn)) return rule_curly;
else if (Bry_.Eq(bry, rule_brack.bgn)) return rule_brack;
else if (Bry_.Eq(bry, rule_langv.bgn)) return rule_langv;
else throw Err_.new_unhandled(bry);
}
private static final int
Found__line_bgn = 0
, Found__line_end = 1
, Found__pipe = 2
, Found__equals = 3
, Found__angle = 4
, Found__close = 5
, Found__open = 6
;
private static final Xomw_prepro_rule
rule_curly = new Xomw_prepro_rule(Bry_.new_a7("{"), Bry_.new_a7("}") , 2, 3, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__tmpl, Xomw_prepro_rule.Name__targ})
, rule_brack = new Xomw_prepro_rule(Bry_.new_a7("["), Bry_.new_a7("]") , 2, 2, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__null})
, rule_langv = new Xomw_prepro_rule(Bry_.new_a7("-{"), Bry_.new_a7("}-"), 1, 1, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__null})
;
private static final byte[]
Bry__only_include_bgn = Bry_.new_a7("<onlyinclude>")
, Bry__only_include_end = Bry_.new_a7("</onlyinclude>")
, Bry__comment_bgn = Bry_.new_a7("<!--")
, Bry__comment_end = Bry_.new_a7("-->")
, Bry__escaped_lt = Bry_.new_a7("<")
, Bry__end_lhs = Bry_.new_a7("</")
;
private static final int Len__only_include_end = Bry__only_include_end.length;
private static final Btrie_slim_mgr cur_char_trie = Cur_char_trie__new();
private static final Ordered_hash
ignored_tags_y = Ordered_hash_.New_bry().Add_many_str("includeonly", "/includeonly")
, ignored_tags_n = Ordered_hash_.New_bry().Add_many_str("noinclude", "/noinclude", "onlyinclude", "/onlyinclude");
private static final Hash_adp_bry
ignored_elements__y = Hash_adp_bry.cs().Add_many_str("noinclude")
, ignored_elements__n = Hash_adp_bry.cs().Add_many_str("includeonly");
private static Btrie_slim_mgr Cur_char_trie__new() {
Btrie_slim_mgr rv = Btrie_slim_mgr.ci_a7();
String[] ary = new String[] {"|", "=", "<", "\n", "{", "[", "-{", "}", "]"};
for (String str : ary) {
byte[] bry = Bry_.new_a7(str);
rv.Add_obj(bry, new Xomw_prepro_curchar_itm(bry, bry[0]));
}
// handle "}-" separately
byte[] langv_end = Bry_.new_a7("}-");
rv.Add_obj(langv_end, new Xomw_prepro_curchar_itm(langv_end, Byte_ascii.At));
return rv;
}
}