Tokenizer.java example

Explorer
milton2-master
// Copyright (c) 2003-2004 Brian Wellington (bwelling@xbill.org)
//
// Copyright (C) 2003-2004 Nominum, Inc.
// 
// Permission to use, copy, modify, and distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND NOMINUM DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL NOMINUM BE LIABLE FOR ANY
// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
// OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
//

package io.milton.dns.record;

import io.milton.dns.Address;
import io.milton.dns.Name;
import io.milton.dns.TextParseException;
import io.milton.dns.utils.base16;
import io.milton.dns.utils.base32;
import io.milton.dns.utils.base64;

import java.io.*;
import java.net.*;

import io.milton.dns.utils.*;

/**
 * Tokenizer is used to parse DNS records and zones from text format,
 *
 * @author Brian Wellington
 * @author Bob Halley
 */

public class Tokenizer {

private static String delim = " \t\n;()\"";
private static String quotes = "\"";

/** End of file */
public static final int EOF		= 0;

/** End of line */
public static final int EOL		= 1;

/** Whitespace; only returned when wantWhitespace is set */
public static final int WHITESPACE	= 2;

/** An identifier (unquoted string) */
public static final int IDENTIFIER	= 3;

/** A quoted string */
public static final int QUOTED_STRING	= 4;

/** A comment; only returned when wantComment is set */
public static final int COMMENT		= 5;

private PushbackInputStream is;
private boolean ungottenToken;
private int multiline;
private boolean quoting;
private String delimiters;
private Token current;
private StringBuffer sb;
private boolean wantClose;

private String filename;
private int line;

public static class Token {
	/** The type of token. */
	public int type;

	/** The value of the token, or null for tokens without values. */
	public String value;

	private
	Token() {
		type = -1;
		value = null;
	}

	private Token
	set(int type, StringBuffer value) {
		if (type < 0)
			throw new IllegalArgumentException();
		this.type = type;
		this.value = value == null ? null : value.toString();
		return this;
	}

	/**
	 * Converts the token to a string containing a representation useful
	 * for debugging.
	 */
	public String
	toString() {
		switch (type) {
		case EOF:
			return "<eof>";
		case EOL:
			return "<eol>";
		case WHITESPACE:
			return "<whitespace>";
		case IDENTIFIER:
			return "<identifier: " + value + ">";
		case QUOTED_STRING:
			return "<quoted_string: " + value + ">";
		case COMMENT:
			return "<comment: " + value + ">";
		default:
			return "<unknown>";
		}
	}

	/** Indicates whether this token contains a string. */
	public boolean
	isString() {
		return (type == IDENTIFIER || type == QUOTED_STRING);
	}

	/** Indicates whether this token contains an EOL or EOF. */
	public boolean
	isEOL() {
		return (type == EOL || type == EOF);
	}
}

public static class TokenizerException extends TextParseException {
	String message;

	public
	TokenizerException(String filename, int line, String message) {
		super(filename + ":" + line + ": " + message);
		this.message = message;
	}

	public String
	getBaseMessage() {
		return message;
	}
}

/**
 * Creates a Tokenizer from an arbitrary input stream.
 * @param is The InputStream to tokenize.
 */
public
Tokenizer(InputStream is) {
	if (!(is instanceof BufferedInputStream))
		is = new BufferedInputStream(is);
	this.is = new PushbackInputStream(is, 2);
	ungottenToken = false;
	multiline = 0;
	quoting = false;
	delimiters = delim;
	current = new Token();
	sb = new StringBuffer();
	filename = "<none>";
	line = 1;
}

/**
 * Creates a Tokenizer from a string.
 * @param s The String to tokenize.
 */
public
Tokenizer(String s) {
	this(new ByteArrayInputStream(s.getBytes()));
}

/**
 * Creates a Tokenizer from a file.
 * @param f The File to tokenize.
 */
public
Tokenizer(File f) throws FileNotFoundException {
	this(new FileInputStream(f));
	wantClose = true;
	filename = f.getName();
}

private int
getChar() throws IOException {
	int c = is.read();
	if (c == '\r') {
		int next = is.read();
		if (next != '\n')
			is.unread(next);
		c = '\n';
	}
	if (c == '\n')
		line++;
	return c;
}

private void
ungetChar(int c) throws IOException {
	if (c == -1)
		return;
	is.unread(c);
	if (c == '\n')
		line--;
}

private int
skipWhitespace() throws IOException {
	int skipped = 0;
	while (true) {
		int c = getChar();
		if (c != ' ' && c != '\t') {
	                if (!(c == '\n' && multiline > 0)) {
				ungetChar(c);
				return skipped;
			}
		}
		skipped++;
	}
}

private void
checkUnbalancedParens() throws TextParseException {
	if (multiline > 0)
		throw exception("unbalanced parentheses");
}

/**
 * Gets the next token from a tokenizer.
 * @param wantWhitespace If true, leading whitespace will be returned as a
 * token.
 * @param wantComment If true, comments are returned as tokens.
 * @return The next token in the stream.
 * @throws TextParseException The input was invalid.
 * @throws IOException An I/O error occurred.
 */
public Token
get(boolean wantWhitespace, boolean wantComment) throws IOException {
	int type;
	int c;

	if (ungottenToken) {
		ungottenToken = false;
		if (current.type == WHITESPACE) {
			if (wantWhitespace)
				return current;
		} else if (current.type == COMMENT) {
			if (wantComment)
				return current;
		} else {
			if (current.type == EOL)
				line++;
			return current;
		}
	}
	int skipped = skipWhitespace();
	if (skipped > 0 && wantWhitespace)
		return current.set(WHITESPACE, null);
	type = IDENTIFIER;
	sb.setLength(0);
	while (true) {
		c = getChar();
		if (c == -1 || delimiters.indexOf(c) != -1) {
			if (c == -1) {
				if (quoting)
					throw exception("EOF in " +
							"quoted string");
				else if (sb.length() == 0)
					return current.set(EOF, null);
				else
					return current.set(type, sb);
			}
			if (sb.length() == 0 && type != QUOTED_STRING) {
				if (c == '(') {
					multiline++;
					skipWhitespace();
					continue;
				} else if (c == ')') {
					if (multiline <= 0)
						throw exception("invalid " +
								"close " +
								"parenthesis");
					multiline--;
					skipWhitespace();
					continue;
				} else if (c == '"') {
					if (!quoting) {
						quoting = true;
						delimiters = quotes;
						type = QUOTED_STRING;
					} else {
						quoting = false;
						delimiters = delim;
						skipWhitespace();
					}
					continue;
				} else if (c == '\n') {
					return current.set(EOL, null);
				} else if (c == ';') {
					while (true) {
						c = getChar();
						if (c == '\n' || c == -1)
							break;
						sb.append((char)c);
					}
					if (wantComment) {
						ungetChar(c);
						return current.set(COMMENT, sb);
					} else if (c == -1 &&
						   type != QUOTED_STRING)
					{
						checkUnbalancedParens();
						return current.set(EOF, null);
					} else if (multiline > 0) {
						skipWhitespace();
						sb.setLength(0);
						continue;
					} else
						return current.set(EOL, null);
				} else
					throw new IllegalStateException();
			} else
				ungetChar(c);
			break;
		} else if (c == '\\') {
			c = getChar();
			if (c == -1)
				throw exception("unterminated escape sequence");
			sb.append('\\');
		} else if (quoting && c == '\n') {
			throw exception("newline in quoted string");
		}
		sb.append((char)c);
	}
	if (sb.length() == 0 && type != QUOTED_STRING) {
		checkUnbalancedParens();
		return current.set(EOF, null);
	}
	return current.set(type, sb);
}

/**
 * Gets the next token from a tokenizer, ignoring whitespace and comments.
 * @return The next token in the stream.
 * @throws TextParseException The input was invalid.
 * @throws IOException An I/O error occurred.
 */
public Token
get() throws IOException {
	return get(false, false);
}

/**
 * Returns a token to the stream, so that it will be returned by the next call
 * to get().
 * @throws IllegalStateException There are already ungotten tokens.
 */
public void
unget() {
	if (ungottenToken)
		throw new IllegalStateException
				("Cannot unget multiple tokens");
	if (current.type == EOL)
		line--;
	ungottenToken = true;
}

/**
 * Gets the next token from a tokenizer and converts it to a string.
 * @return The next token in the stream, as a string.
 * @throws TextParseException The input was invalid or not a string.
 * @throws IOException An I/O error occurred.
 */
public String
getString() throws IOException {
	Token next = get();
	if (!next.isString()) {
		throw exception("expected a string");
	}
	return next.value;
}

private String
_getIdentifier(String expected) throws IOException {
	Token next = get();
	if (next.type != IDENTIFIER)
		throw exception("expected " + expected);
	return next.value;
}

/**
 * Gets the next token from a tokenizer, ensures it is an unquoted string,
 * and converts it to a string.
 * @return The next token in the stream, as a string.
 * @throws TextParseException The input was invalid or not an unquoted string.
 * @throws IOException An I/O error occurred.
 */
public String
getIdentifier() throws IOException {
	return _getIdentifier("an identifier");
}

/**
 * Gets the next token from a tokenizer and converts it to a long.
 * @return The next token in the stream, as a long.
 * @throws TextParseException The input was invalid or not a long.
 * @throws IOException An I/O error occurred.
 */
public long
getLong() throws IOException {
	String next = _getIdentifier("an integer");
	if (!Character.isDigit(next.charAt(0)))
		throw exception("expected an integer");
	try {
		return Long.parseLong(next);
	} catch (NumberFormatException e) {
		throw exception("expected an integer");
	}
}

/**
 * Gets the next token from a tokenizer and converts it to an unsigned 32 bit
 * integer.
 * @return The next token in the stream, as an unsigned 32 bit integer.
 * @throws TextParseException The input was invalid or not an unsigned 32
 * bit integer.
 * @throws IOException An I/O error occurred.
 */
public long
getUInt32() throws IOException {
	long l = getLong();
	if (l < 0 || l > 0xFFFFFFFFL)
		throw exception("expected an 32 bit unsigned integer");
	return l;
}

/**
 * Gets the next token from a tokenizer and converts it to an unsigned 16 bit
 * integer.
 * @return The next token in the stream, as an unsigned 16 bit integer.
 * @throws TextParseException The input was invalid or not an unsigned 16
 * bit integer.
 * @throws IOException An I/O error occurred.
 */
public int
getUInt16() throws IOException {
	long l = getLong();
	if (l < 0 || l > 0xFFFFL)
		throw exception("expected an 16 bit unsigned integer");
	return (int) l;
}

/**
 * Gets the next token from a tokenizer and converts it to an unsigned 8 bit
 * integer.
 * @return The next token in the stream, as an unsigned 8 bit integer.
 * @throws TextParseException The input was invalid or not an unsigned 8
 * bit integer.
 * @throws IOException An I/O error occurred.
 */
public int
getUInt8() throws IOException {
	long l = getLong();
	if (l < 0 || l > 0xFFL)
		throw exception("expected an 8 bit unsigned integer");
	return (int) l;
}

/**
 * Gets the next token from a tokenizer and parses it as a TTL.
 * @return The next token in the stream, as an unsigned 32 bit integer.
 * @throws TextParseException The input was not valid.
 * @throws IOException An I/O error occurred.
 * @see TTL
 */
public long
getTTL() throws IOException {
	String next = _getIdentifier("a TTL value");
	try {
		return TTL.parseTTL(next);
	}
	catch (NumberFormatException e) {
		throw exception("expected a TTL value");
	}
}

/**
 * Gets the next token from a tokenizer and parses it as if it were a TTL.
 * @return The next token in the stream, as an unsigned 32 bit integer.
 * @throws TextParseException The input was not valid.
 * @throws IOException An I/O error occurred.
 * @see TTL
 */
public long
getTTLLike() throws IOException {
	String next = _getIdentifier("a TTL-like value");
	try {
		return TTL.parse(next, false);
	}
	catch (NumberFormatException e) {
		throw exception("expected a TTL-like value");
	}
}

/**
 * Gets the next token from a tokenizer and converts it to a name.
 * @param origin The origin to append to relative names.
 * @return The next token in the stream, as a name.
 * @throws TextParseException The input was invalid or not a valid name.
 * @throws IOException An I/O error occurred.
 * @throws RelativeNameException The parsed name was relative, even with the
 * origin.
 * @see Name
 */
public Name
getName(Name origin) throws IOException {
	String next = _getIdentifier("a name");
	try {
		Name name = Name.fromString(next, origin);
		if (!name.isAbsolute())
			throw new RelativeNameException(name);
		return name;
	}
	catch (TextParseException e) {
		throw exception(e.getMessage());
	}
}

/**
 * Gets the next token from a tokenizer and converts it to an IP Address.
 * @param family The address family.
 * @return The next token in the stream, as an InetAddress
 * @throws TextParseException The input was invalid or not a valid address.
 * @throws IOException An I/O error occurred.
 * @see Address
 */
public InetAddress
getAddress(int family) throws IOException {
	String next = _getIdentifier("an address");
	try {
		return Address.getByAddress(next, family);
	}
	catch (UnknownHostException e) {
		throw exception(e.getMessage());
	}
}

/**
 * Gets the next token from a tokenizer, which must be an EOL or EOF.
 * @throws TextParseException The input was invalid or not an EOL or EOF token.
 * @throws IOException An I/O error occurred.
 */
public void
getEOL() throws IOException {
	Token next = get();
	if (next.type != EOL && next.type != EOF) {
		throw exception("expected EOL or EOF");
	}
}

/**
 * Returns a concatenation of the remaining strings from a Tokenizer.
 */
private String
remainingStrings() throws IOException {
        StringBuffer buffer = null;
        while (true) {
                Tokenizer.Token t = get();
                if (!t.isString())
                        break;
                if (buffer == null)
                        buffer = new StringBuffer();
                buffer.append(t.value);
        }
        unget();
        if (buffer == null)
                return null;
        return buffer.toString();
}

/**
 * Gets the remaining string tokens until an EOL/EOF is seen, concatenates
 * them together, and converts the base64 encoded data to a byte array.
 * @param required If true, an exception will be thrown if no strings remain;
 * otherwise null be be returned.
 * @return The byte array containing the decoded strings, or null if there
 * were no strings to decode.
 * @throws TextParseException The input was invalid.
 * @throws IOException An I/O error occurred.
 */
public byte []
getBase64(boolean required) throws IOException {
	String s = remainingStrings();
	if (s == null) {
		if (required)
			throw exception("expected base64 encoded string");
		else
			return null;
	}
	byte [] array = base64.fromString(s);
	if (array == null)
		throw exception("invalid base64 encoding");
	return array;
}

/**
 * Gets the remaining string tokens until an EOL/EOF is seen, concatenates
 * them together, and converts the base64 encoded data to a byte array.
 * @return The byte array containing the decoded strings, or null if there
 * were no strings to decode.
 * @throws TextParseException The input was invalid.
 * @throws IOException An I/O error occurred.
 */
public byte []
getBase64() throws IOException {
	return getBase64(false);
}

/**
 * Gets the remaining string tokens until an EOL/EOF is seen, concatenates
 * them together, and converts the hex encoded data to a byte array.
 * @param required If true, an exception will be thrown if no strings remain;
 * otherwise null be be returned.
 * @return The byte array containing the decoded strings, or null if there
 * were no strings to decode.
 * @throws TextParseException The input was invalid.
 * @throws IOException An I/O error occurred.
 */
public byte []
getHex(boolean required) throws IOException {
	String s = remainingStrings();
	if (s == null) {
		if (required)
			throw exception("expected hex encoded string");
		else
			return null;
	}
	byte [] array = base16.fromString(s);
	if (array == null)
		throw exception("invalid hex encoding");
	return array;
}

/**
 * Gets the remaining string tokens until an EOL/EOF is seen, concatenates
 * them together, and converts the hex encoded data to a byte array.
 * @return The byte array containing the decoded strings, or null if there
 * were no strings to decode.
 * @throws TextParseException The input was invalid.
 * @throws IOException An I/O error occurred.
 */
public byte []
getHex() throws IOException {
	return getHex(false);
}

/**
 * Gets the next token from a tokenizer and decodes it as hex.
 * @return The byte array containing the decoded string.
 * @throws TextParseException The input was invalid.
 * @throws IOException An I/O error occurred.
 */
public byte []
getHexString() throws IOException {
	String next = _getIdentifier("a hex string");
	byte [] array = base16.fromString(next);
	if (array == null)
		throw exception("invalid hex encoding");
	return array;
}

/**
 * Gets the next token from a tokenizer and decodes it as base32.
 * @param b32 The base32 context to decode with.
 * @return The byte array containing the decoded string.
 * @throws TextParseException The input was invalid.
 * @throws IOException An I/O error occurred.
 */
public byte []
getBase32String(base32 b32) throws IOException {
	String next = _getIdentifier("a base32 string");
	byte [] array = b32.fromString(next);
	if (array == null)
		throw exception("invalid base32 encoding");
	return array;
}

/**
 * Creates an exception which includes the current state in the error message
 * @param s The error message to include.
 * @return The exception to be thrown
 */
public TextParseException
exception(String s) {
	return new TokenizerException(filename, line, s);
}

/**
 * Closes any files opened by this tokenizer.
 */
public void
close() {
	if (wantClose) {
		try {
			is.close();
		}
		catch (IOException e) {
		}
	}
}

protected void
finalize() {
	close();
}

}