package org.basex.query.util.format;
import static org.basex.util.Token.*;
/**
* This class assembles methods and variables that are used by more than one
* formatter class. Some numberings have been adopted from Michael H. Kay's
* AbstractNumberer class in Saxon.
*
* @author BaseX Team 2005-12, BSD License
* @author Christian Gruen
*/
abstract class FormatUtil {
/** Zero digits. */
private static final int[] ZEROES = {
0x30, 0x660, 0x6F0, 0x7C0, 0x966, 0x9E6, 0xA66, 0xAE6, 0xB66, 0xBE6, 0xC66,
0xCE6, 0xD66, 0xE50, 0xED0, 0xF20, 0x1040, 0x1090, 0x17E0, 0x1810, 0x1946,
0x19D0, 0x1A80, 0x1A90, 0x1B50, 0x1BB0, 0x1C40, 0x1C50, 0xA620, 0xA8D0,
0xA900, 0xA9D0, 0xAA50, 0xABF0, 0xFF10, 0x104A0, 0x11066, 0x1D7CE, 0x1D7D8,
0x1D7E2, 0x1D7EC, 0x1D7F6
};
/** Kanji digits. */
static final int[] KANJI = {
0x3007, 0x4e00, 0x4e8c, 0x4e09, 0x56db, 0x4e94, 0x516d, 0x4e03, 0x516b,
0x4e5d, 0x5341, 0x767E, 0x5343, 0x4e07, 0x5104, 0x5146 };
/** Roman numbers (1-10). */
static final byte[][] ROMANI =
tokens("", "I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX");
/** Roman numbers (10-100). */
static final byte[][] ROMANX =
tokens("", "X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC");
/** Roman numbers (100-1000). */
static final byte[][] ROMANC =
tokens("", "C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM");
/** Roman numbers (1000-3000). */
static final byte[][] ROMANM = tokens("", "M", "MM", "MMM");
/** Alphabet sequences. */
private static final String[] SEQS = {
// Latin numbering
"abcdefghijklmnopqrstuvwxyz",
// Latin numbering (upper case)
"ABCDEFGHIJKLMNOPQRSTUVWXYZ",
// Greek numbering
"\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba" +
"\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c2\u03c3\u03c4" +
"\u03c5\u03c6\u03c7\u03c8\u03c9",
// Greek numbering (upper case)
"\u0391\u0392\u0393\u0394\u0395\u0396\u0397\u0398\u0399\u039a" +
"\u039b\u039c\u039d\u039e\u039f\u03a0\u03a1\u03a2\u03a3\u03a4" +
"\u03a5\u03a6\u03a7\u03a8\u03a9",
// Hebrew numbering
"\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05db" +
"\u05dc\u05de\u05e0\u05e1\u05e2\u05e4\u05e6\u05e7\u05e8\u05e9\u05ea",
// Cyrillic numbering, based on Dmitry Kirsanov's sequence in Saxon.
"\u0430\u0431\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u043a" +
"\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0441\u0443\u0444" +
"\u0445\u0446\u0447\u0448\u0449\u044b\u044d\u044e\u044f",
// Cyrillic numbering (upper case).
"\u0410\u0411\u0412\u0413\u0414\u0415\u0416\u0417\u0418\u041a" +
"\u041b\u041c\u041d\u041e\u041f\u0420\u0421\u0421\u0423\u0424" +
"\u0425\u0426\u0427\u0428\u0429\u042b\u042d\u042e\u042f",
// Hiragana A numbering, based on Murakami Shinyu's sequences in Saxon.
"\u3042\u3044\u3046\u3048\u304a\u304b\u304d\u304f\u3051\u3053" +
"\u3055\u3057\u3059\u305b\u305d\u305f\u3061\u3064\u3066\u3068" +
"\u306a\u306b\u306c\u306d\u306e\u306f\u3072\u3075\u3078\u307b" +
"\u307e\u307f\u3080\u3081\u3082\u3084\u3086\u3088\u3089\u308a" +
"\u308b\u308c\u308d\u308f\u3092\u3093",
// Katakana A numbering.
"\u30a2\u30a4\u30a6\u30a8\u30aa\u30ab\u30ad\u30af\u30b1\u30b3" +
"\u30b5\u30b7\u30b9\u30bb\u30bd\u30bf\u30c1\u30c4\u30c6\u30c8" +
"\u30ca\u30cb\u30cc\u30cd\u30ce\u30cf\u30d2\u30d5\u30d8\u30db" +
"\u30de\u30df\u30e0\u30e1\u30e2\u30e4\u30e6\u30e8\u30e9\u30ea" +
"\u30eb\u30ec\u30ed\u30ef\u30f2\u30f3",
// Hiragana I numbering.
"\u3044\u308d\u306f\u306b\u307b\u3078\u3068\u3061\u308a\u306c" +
"\u308b\u3092\u308f\u304b\u3088\u305f\u308c\u305d\u3064\u306d" +
"\u306a\u3089\u3080\u3046\u3090\u306e\u304a\u304f\u3084\u307e" +
"\u3051\u3075\u3053\u3048\u3066\u3042\u3055\u304d\u3086\u3081" +
"\u307f\u3057\u3091\u3072\u3082\u305b\u3059",
// Katakana I numbering.
"\u30a4\u30ed\u30cf\u30cb\u30db\u30d8\u30c8\u30c1\u30ea\u30cc" +
"\u30eb\u30f2\u30ef\u30ab\u30e8\u30bf\u30ec\u30bd\u30c4\u30cd" +
"\u30ca\u30e9\u30e0\u30a6\u30f0\u30ce\u30aa\u30af\u30e4\u30de" +
"\u30b1\u30d5\u30b3\u30a8\u30c6\u30a2\u30b5\u30ad\u30e6\u30e1" +
"\u30df\u30b7\u30f1\u30d2\u30e2\u30bb\u30b9"
};
/**
* Returns a character sequence the first character of which equals the
* specified character.
* @param ch character to be checked
* @return character sequence, or {@code null}
*/
static String sequence(final int ch) {
for(final String seq : SEQS) if(ch == seq.charAt(0)) return seq;
return null;
}
/** Cases. */
protected enum Case {
/** Lower case. */ LOWER,
/** Upper case. */ UPPER,
/** Standard. */ STANDARD
}
/**
* Returns the zero base for the specified code point, or {@code -1}.
* @param ch character
* @return zero base
*/
static int zeroes(final int ch) {
for(final int z : ZEROES) if(ch >= z && ch <= z + 9) return z;
return -1;
}
/**
* Returns the character at the specified position, or {@code 0} if the
* specified position is outside the string range.
* @param in input
* @param pos position
* @return character
*/
static int ch(final byte[] in, final int pos) {
return pos >= 0 && pos < in.length ? cp(in, pos) : 0;
}
}