package com.laytonsmith.PureUtilities; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.io.Reader; import java.util.LinkedHashMap; /** * This class is meant to be a nearly drop in replacement to the * {@link java.util.Properties} class, but it extends * {@code LinkedHashMap<String, String>} instead of * {@code Hashtable<Object, Object>}. It is intended for string * based properties files, as opposed to object based properties. */ public class PropertiesManager extends LinkedHashMap<String, String>{ public PropertiesManager() { this(""); } /** * <p> * Properties are processed in terms of lines. There are two kinds of line, * <i>natural lines</i> and <i>logical lines</i>. A natural line is defined * as a line of characters that is terminated either by a set of line * terminator characters ( * <code>\n</code> or * <code>\r</code> or * <code>\r\n</code>) or by the end of the stream. A natural line may be * either a blank line, a comment line, or hold all or some of a key-element * pair. A logical line holds all the data of a key-element pair, which may * be spread out across several adjacent natural lines by escaping the line * terminator sequence with a backslash character * <code>\</code>. Note that a comment line cannot be extended in this * manner; every natural line that is a comment must have its own comment * indicator, as described below. Lines are read from input until the end of * the stream is reached. * * <p> * A natural line that contains only white space characters is considered * blank and is ignored. A comment line has an ASCII * <code>'#'</code> or * <code>'!'</code> as its first non-white space character; comment lines * are also ignored and do not encode key-element information. In addition * to line terminators, this format considers the characters space * ( * <code>' '</code>, * <code>'\u0020'</code>), tab * ( * <code>'\t'</code>, * <code>'\u0009'</code>), and form feed * ( * <code>'\f'</code>, * <code>'\u000C'</code>) to be white space. * * <p> * If a logical line is spread across several natural lines, the backslash * escaping the line terminator sequence, the line terminator sequence, and * any white space at the start of the following line have no affect on the * key or element values. The remainder of the discussion of key and element * parsing (when loading) will assume all the characters constituting the * key and element appear on a single natural line after line continuation * characters have been removed. Note that it is <i>not</i> sufficient to * only examine the character preceding a line terminator sequence to decide * if the line terminator is escaped; there must be an odd number of * contiguous backslashes for the line terminator to be escaped. Since the * input is processed from left to right, a non-zero even number of * 2<i>n</i> contiguous backslashes before a line terminator (or elsewhere) * encodes <i>n</i> * backslashes after escape processing. * * <p> * The key contains all of the characters in the line starting with the * first non-white space character and up to, but not including, the first * unescaped * <code>'='</code>, * <code>':'</code>, or white space character other than a line terminator. * All of these key termination characters may be included in the key by * escaping them with a preceding backslash character; for example,<p> * * <code>\:\=</code><p> * * would be the two-character key * <code>":="</code>. Line terminator characters can be included using * <code>\r</code> and * <code>\n</code> escape sequences. Any white space after the key is * skipped; if the first non-white space character after the key is * <code>'='</code> or * <code>':'</code>, then it is ignored and any white space characters after * it are also skipped. All remaining characters on the line become part of * the associated element string; if there are no remaining characters, the * element is the empty string * <code>""</code>. Once the raw character sequences constituting * the key and element are identified, escape processing is performed as * described above. * * <p> * As an example, each of the following three lines specifies the key * <code>"Truth"</code> and the associated element value * <code>"Beauty"</code>: * <p> * <pre> * Truth = Beauty * Truth:Beauty * Truth :Beauty * </pre> As another example, the following three lines specify a single * property: * <p> * <pre> * fruits apple, banana, pear, \ * cantaloupe, watermelon, \ * kiwi, mango * </pre> The key is * <code>"fruits"</code> and the associated element is: * <p> * <pre>"apple, banana, pear, cantaloupe, watermelon, kiwi, mango"</pre> * Note that a space appears before each * <code>\</code> so that a space will appear after each comma in the final * result; the * <code>\</code>, line terminator, and leading white space on the * continuation line are merely discarded and are <i>not</i> replaced by one * or more other characters. * <p> * As a third example, the line: * <p> * <pre>cheeses * </pre> specifies that the key is * <code>"cheeses"</code> and the associated element is the empty string * <code>""</code>.<p> * <p> * * <a name="unicodeescapes"></a> * Characters in keys and elements can be represented in escape sequences * similar to those used for character and string literals (see <a * href="http://java.sun.com/docs/books/jls/third_edition/html/lexical.html#3.3">§3.3</a> * and <a * href="http://java.sun.com/docs/books/jls/third_edition/html/lexical.html#3.10.6">§3.10.6</a> * of the <i>Java Language Specification</i>). * * The differences from the character escape sequences and Unicode escapes * used for characters and strings are: * * <ul> * <li> Octal escapes are not recognized. * * <li> The character sequence * <code>\b</code> does <i>not</i> * represent a backspace character. * * <li> The method does not treat a backslash character, * <code>\</code>, before a non-valid escape character as an error; the * backslash is silently dropped. For example, in a Java string the sequence * <code>"\z"</code> would cause a compile time error. In contrast, this * method silently drops the backslash. Therefore, this method treats the * two character sequence * <code>"\b"</code> as equivalent to the single character * <code>'b'</code>. * * <li> Escapes are not necessary for single and double quotes; however, by * the rule above, single and double quote characters preceded by a * backslash still yield single and double quote characters, respectively. * * <li> Only a single 'u' character is allowed in a Uniocde escape sequence. */ public PropertiesManager(String properties) { try { InputStream is = new ByteArrayInputStream(properties.getBytes("UTF-8")); load(new LineReader(is)); } catch (IOException ex) { //Either encoding exception or IO error. Neither should actually happen here. throw new Error(ex); } } public PropertiesManager(InputStream stream) throws IOException{ load(new LineReader(stream)); } private void load(LineReader lr) throws IOException { char[] convtBuf = new char[1024]; int limit; int keyLen; int valueStart; char c; boolean hasSep; boolean precedingBackslash; while ((limit = lr.readLine()) >= 0) { c = 0; keyLen = 0; valueStart = limit; hasSep = false; //System.out.println("line=<" + new String(lineBuf, 0, limit) + ">"); precedingBackslash = false; while (keyLen < limit) { c = lr.lineBuf[keyLen]; //need check if escaped. if ((c == '=' || c == ':') && !precedingBackslash) { valueStart = keyLen + 1; hasSep = true; break; } else if ((c == ' ' || c == '\t' || c == '\f') && !precedingBackslash) { valueStart = keyLen + 1; break; } if (c == '\\') { precedingBackslash = !precedingBackslash; } else { precedingBackslash = false; } keyLen++; } while (valueStart < limit) { c = lr.lineBuf[valueStart]; if (c != ' ' && c != '\t' && c != '\f') { if (!hasSep && (c == '=' || c == ':')) { hasSep = true; } else { break; } } valueStart++; } String key = loadConvert(lr.lineBuf, 0, keyLen, convtBuf); String value = loadConvert(lr.lineBuf, valueStart, limit - valueStart, convtBuf); put(key, value); } } /* Read in a "logical line" from an InputStream/Reader, skip all comment * and blank lines and filter out those leading whitespace characters * (\u0020, \u0009 and \u000c) from the beginning of a "natural line". * Method returns the char length of the "logical line" and stores * the line in "lineBuf". */ class LineReader { public LineReader(InputStream inStream) { this.inStream = inStream; inByteBuf = new byte[8192]; } public LineReader(Reader reader) { this.reader = reader; inCharBuf = new char[8192]; } byte[] inByteBuf; char[] inCharBuf; char[] lineBuf = new char[1024]; int inLimit = 0; int inOff = 0; InputStream inStream; Reader reader; int readLine() throws IOException { int len = 0; char c = 0; boolean skipWhiteSpace = true; boolean isCommentLine = false; boolean isNewLine = true; boolean appendedLineBegin = false; boolean precedingBackslash = false; boolean skipLF = false; while (true) { if (inOff >= inLimit) { inLimit = (inStream==null)?reader.read(inCharBuf) :inStream.read(inByteBuf); inOff = 0; if (inLimit <= 0) { if (len == 0 || isCommentLine) { return -1; } return len; } } if (inStream != null) { //The line below is equivalent to calling a //ISO8859-1 decoder. c = (char) (0xff & inByteBuf[inOff++]); } else { c = inCharBuf[inOff++]; } if (skipLF) { skipLF = false; if (c == '\n') { continue; } } if (skipWhiteSpace) { if (c == ' ' || c == '\t' || c == '\f') { continue; } if (!appendedLineBegin && (c == '\r' || c == '\n')) { continue; } skipWhiteSpace = false; appendedLineBegin = false; } if (isNewLine) { isNewLine = false; if (c == '#' || c == '!') { isCommentLine = true; continue; } } if (c != '\n' && c != '\r') { lineBuf[len++] = c; if (len == lineBuf.length) { int newLength = lineBuf.length * 2; if (newLength < 0) { newLength = Integer.MAX_VALUE; } char[] buf = new char[newLength]; System.arraycopy(lineBuf, 0, buf, 0, lineBuf.length); lineBuf = buf; } //flip the preceding backslash flag if (c == '\\') { precedingBackslash = !precedingBackslash; } else { precedingBackslash = false; } } else { // reached EOL if (isCommentLine || len == 0) { isCommentLine = false; isNewLine = true; skipWhiteSpace = true; len = 0; continue; } if (inOff >= inLimit) { inLimit = (inStream==null) ?reader.read(inCharBuf) :inStream.read(inByteBuf); inOff = 0; if (inLimit <= 0) { return len; } } if (precedingBackslash) { len -= 1; //skip the leading whitespace characters in following line skipWhiteSpace = true; appendedLineBegin = true; precedingBackslash = false; if (c == '\r') { skipLF = true; } } else { return len; } } } } } /* * Converts encoded \uxxxx to unicode chars * and changes special saved chars to their original forms */ private String loadConvert (char[] in, int off, int len, char[] convtBuf) { if (convtBuf.length < len) { int newLen = len * 2; if (newLen < 0) { newLen = Integer.MAX_VALUE; } convtBuf = new char[newLen]; } char aChar; char[] out = convtBuf; int outLen = 0; int end = off + len; while (off < end) { aChar = in[off++]; if (aChar == '\\') { aChar = in[off++]; if(aChar == 'u') { // Read the xxxx int value=0; for (int i=0; i<4; i++) { aChar = in[off++]; switch (aChar) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': value = (value << 4) + aChar - '0'; break; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': value = (value << 4) + 10 + aChar - 'a'; break; case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': value = (value << 4) + 10 + aChar - 'A'; break; default: throw new IllegalArgumentException( "Malformed \\uxxxx encoding."); } } out[outLen++] = (char)value; } else { if (aChar == 't') aChar = '\t'; else if (aChar == 'r') aChar = '\r'; else if (aChar == 'n') aChar = '\n'; else if (aChar == 'f') aChar = '\f'; out[outLen++] = aChar; } } else { out[outLen++] = aChar; } } return new String (out, 0, outLen); } }