package org.basex.query.util.format; import static org.basex.query.util.Err.*; import static org.basex.util.Token.*; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.basex.query.QueryException; import org.basex.util.InputInfo; import org.basex.util.TokenBuilder; /** * Format parser for integers and dates. * * @author BaseX Team 2005-12, BSD License * @author Christian Gruen */ public final class FormatParser extends FormatUtil { /** With pattern: "," min-width ("-" max-width)?. */ private static final Pattern WIDTH = Pattern.compile("(\\*|\\d+)(-(\\*|\\d+))?"); /** Input information. */ private final InputInfo input; /** Input to be parsed. */ private final byte[] pic; /** Default modifier. */ private final byte[] def; /** Extended format (used for dates). */ private final boolean ext; /** Case. */ Case cs; /** Primary format token. */ byte[] primary; /** Primary format or mandatory digit. */ int digit; /** Ordinal suffix; {@code null} if not specified. */ byte[] ordinal; /** Minimum width. */ int min; /** Maximum width. */ int max = Integer.MAX_VALUE; /** * Constructor. * @param ii input info * @param p info picture * @param df default presentation modifier (may be {@code null}) * @throws QueryException query exception */ public FormatParser(final InputInfo ii, final byte[] p, final byte[] df) throws QueryException { input = ii; pic = p.length != 0 ? p : df; ext = df != null; def = ext ? df : ONE; byte[] pm = mod(); if(ext) { // extract and check width modifier final int w = lastIndexOf(pm, ','); if(w != -1) { final byte[] wd = substring(pm, w + 1); pm = substring(pm, 0, w); final Matcher m = WIDTH.matcher(string(wd)); if(!m.find()) PICDATE.thrw(ii, wd); int i = toInt(m.group(1)); if(i != Integer.MIN_VALUE) min = i; final String mc = m.group(3); i = mc != null ? toInt(mc) : Integer.MIN_VALUE; if(i != Integer.MIN_VALUE) max = i; } } // choose first character and case if(pm.length == 0) pm = ONE; cs = cl(pm, 0) < pm.length ? Case.STANDARD : (ch(pm, 0) & ' ') == 0 ? Case.UPPER : Case.LOWER; primary = lc(pm); if(digit == 0) digit = ch(primary, 0); } /** * Returns a presentation modifier. * @return presentation modifier * @throws QueryException query exception */ private byte[] mod() throws QueryException { final int l = pic.length; // final presentation modifier byte[] pm = null; // current offset int pos = cl(pic, 0); // proposed presentation modifier int ch = ch(pic, 0); if(sequence(ch) != null) { // Latin, Greek and other alphabets } else if(ch >= '\u2460' && ch <= '\u249b') { // circled, parenthesized or full stop digits } else if(ch == KANJI[1]) { // Japanese numbering } else if((ch | ' ') == 'i') { // Roman sequence } else if((ch | ' ') == 'w' || (ch | ' ') == 'n' && ext) { // word-wise output (incl. title-case check) if((ch & ' ') == 0 && ch(pic, pos) == (ch | ' ')) pos += cl(pic, pos); } else if(ch == ',') { // width modifier pos = 0; } else { // mandatory-digit-sign int z = -1; boolean group = false; for(pos = 0; pos < l; pos += cl(pic, pos)) { ch = ch(pic, pos); if(z == -1) { z = zeroes(ch); if(z != -1) { digit = z; group = false; } else if(ch == '#') { group = false; } else if(Character.isLetter(ch)) { pm = def; pos += cl(pic, pos); break; } else { if(pos == 0) GROUPSTART.thrw(input, pic); if(group) GROUPADJ.thrw(input, pic); group = true; } } else { if(Character.isLetter(ch)) { pm = substring(pic, 0, pos); break; } else if(ch >= z && ch <= z + 9) { group = false; } else { if(zeroes(ch) != -1) DIFFMAND.thrw(input, pic); if(ch == '#') OPTAFTER.thrw(input, pic); if(group) GROUPADJ.thrw(input, pic); group = true; } } } if(z == -1) NOMAND.thrw(input, pic); if(group) GROUPEND.thrw(input, pic); } // if necessary, extract primary format token from the original string if(pm == null) pm = substring(pic, 0, pos); // check for optional format modifier if(pos < l) { if(ch(pic, pos) == 'o') { final TokenBuilder tb = new TokenBuilder(); if(ch(pic, ++pos) == '(') { while(ch(pic, ++pos) != ')') { // ordinal isn't closed by a parenthesis if(pos == l) ORDCLOSED.thrw(input, pic); tb.add(ch(pic, pos)); } ++pos; } ordinal = tb.finish(); } else if(ch(pic, pos) == 't') { // traditional numbering (ignored) ++pos; } // check for optional format modifier if(pos < l) { // invalid remaining input if(ch(pic, pos) != ',') PICCOMP.thrw(input, pic); pm = concat(pm, substring(pic, pos)); } } return pm; } }