/*******************************************************************************
* Copyright (c) 2015 QNX Software Systems and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* QNX Software Systems - Initial API and implementation
*******************************************************************************/
package org.eclipse.cdt.qt.core.qmldir;
import java.io.InputStream;
import java.util.Scanner;
import java.util.regex.MatchResult;
import java.util.regex.Pattern;
import org.eclipse.cdt.internal.qt.core.location.Position;
import org.eclipse.cdt.internal.qt.core.location.SourceLocation;
import org.eclipse.cdt.qt.core.location.ISourceLocation;
/**
* Converts an <code>InputStream</code> representing a qmldir file into a stream of tokens through successive calls to
* <code>nextToken</code>. This lexer uses regular expressions to match its 16 valid token types:
* <ul>
* <li><b>COMMENT</b>: A single line comment that begins with '#'
* <li><b>MODULE</b>: Keyword 'module'
* <li><b>TYPEINFO</b>: The keyword 'typeinfo'
* <li><b>SINGLETON</b>: The keyword 'singleton'
* <li><b>INTERNAL</b>: The keyword 'internal'
* <li><b>PLUGIN</b>: The keyword 'plugin'
* <li><b>CLASSNAME</b>: The keyword 'classname'
* <li><b>DEPENDS</b>: The keyword 'depends'
* <li><b>DESIGNERSUPPORTED</b>: The keyword 'designersupported'
* <li><b>WORD</b>: A group of characters that form an identifier, filename, or path
* <li><b>DECIMAL</b>: A number of the form [0-9]+ '.' [0-9]+
* <li><b>INTEGER</b>: An integer of the form [0-9]+
* <li><b>WHITESPACE</b>: A group of whitespace characters (not including newlines)
* <li><b>COMMAND_END</b>: A newline character
* <li><b>UNKNOWN</b>: A group of characters that does not match any of the preceding tokens
* <li><b>EOF</b>: End of File
* </ul>
*/
public class QMLDirectoryLexer {
/**
* A single matched token returned by a <code>QMLDirectoryLexer</code>. A <code>Token</code> stores information on how it was
* matched including the type of token, the exact text that was matched, and its position in the <code>InputStream</code> .
*/
public static class Token {
private final TokenType tokType;
private final String raw;
private final ISourceLocation location;
private final int start;
private final int end;
private Token(TokenType type, MatchResult match, int line, int lineStart) {
this(type, match.group(), match.start(), match.end(), line, lineStart);
}
private Token(TokenType type, String raw, int start, int end, int line, int lineStart) {
this.tokType = type;
raw = raw.replaceAll("\n", "\\\\n"); //$NON-NLS-1$ //$NON-NLS-2$
raw = raw.replaceAll("\r", "\\\\r"); //$NON-NLS-1$ //$NON-NLS-2$
this.raw = raw;
this.start = start;
this.end = end;
this.location = new SourceLocation(null,
new Position(line, start - lineStart),
new Position(line, end - lineStart));
}
/**
* Get the type of token that was matched.
*
* @return the type of token
*/
public TokenType getType() {
return tokType;
}
/**
* Gets the raw text that this token was matched with.
*
* @return a String representing the matched text
*/
public String getText() {
return raw;
}
/**
* Gets a more detailed description of this token's location in the <code>InputStream</code> than {@link Token#getStart()}
* and {@link Token#getEnd()}. This method allows the retrieval of line and column information in order to make output for
* syntax errors and the like more human-readable.
*
* @return the {@link ISourceLocation} representing this token's location in the <code>InputStream</code>
*/
public ISourceLocation getLocation() {
return location;
}
/**
* Gets the zero-indexed offset indicating the start of this token in the <code>InputStream</code>.
*
* @return the token's start offset
*/
public int getStart() {
return start;
}
/**
* Gets the zero-indexed offset indicating the end of this token in the <code>InputStream</code>.
*
* @return the token's end offset
*/
public int getEnd() {
return end;
}
}
/**
* An Enumeration encompassing the 16 possible types of tokens returned by a <code>QMLDirectoryLexer</code>.
*
* @see org.eclipse.cdt.qt.core.qmldir.QMLDirectoryLexer
*/
public static enum TokenType {
COMMENT("#.*$"), //$NON-NLS-1$
MODULE("module(?=\\s|$)"), //$NON-NLS-1$
TYPEINFO("typeinfo(?=\\s|$)"), //$NON-NLS-1$
SINGLETON("singleton(?=\\s|$)"), //$NON-NLS-1$
INTERNAL("internal(?=\\s|$)"), //$NON-NLS-1$
PLUGIN("plugin(?=\\s|$)"), //$NON-NLS-1$
CLASSNAME("classname(?=\\s|$)"), //$NON-NLS-1$
DEPENDS("depends(?=\\s|$)"), //$NON-NLS-1$
DESIGNERSUPPORTED("designersupported(?=\\s|$)"), //$NON-NLS-1$
WORD("[^0-9\\s][^\\s]*"), //$NON-NLS-1$
DECIMAL("[0-9]+\\.[0-9]+"), //$NON-NLS-1$
INTEGER("[0-9]+"), //$NON-NLS-1$
WHITESPACE("\\h+"), //$NON-NLS-1$
COMMAND_END("(?:\r\n)|\n"), //$NON-NLS-1$
UNKNOWN(".+"), //$NON-NLS-1$
EOF(null);
private static Pattern pattern;
private static Pattern patternForAllTerminals() {
if (pattern == null) {
String regex = ""; //$NON-NLS-1$
TokenType[] tokens = TokenType.values();
for (int i = 0; i < TokenType.values().length; i++) {
TokenType tok = tokens[i];
if (tok.regex != null) {
if (i != 0) {
regex += "|"; //$NON-NLS-1$
}
regex += "(" + tok.regex + ")"; //$NON-NLS-1$ //$NON-NLS-2$
}
}
pattern = Pattern.compile(regex, Pattern.MULTILINE);
}
return pattern;
}
private final String regex;
private TokenType(String regex) {
this.regex = regex;
}
}
private Scanner input;
private MatchResult lastMatch;
private int currentLine;
private int currentLineStart;
/**
* Creates a new <code>QMLDirectoryLexer</code> without initializing any of the its internal state. A call to
* <code>setInput</code> is necessary to fully initialize the lexer before any calls to <code>nextToken</code>.
*/
public QMLDirectoryLexer() {
}
/**
* Prepares for lexical analysis by giving the lexer an <code>InputStream</code> to retrieve text from.
*
* @param input
* the input to perform lexical analysis on
*/
public void setInput(InputStream input) {
this.input = new Scanner(input);
this.lastMatch = null;
this.currentLine = 1;
this.currentLineStart = 0;
}
/**
* Retrieves the next valid token from the <code>InputStream</code> given by <code>setInput</code>. This is a helper method to
* skip whitespace that is equivalent to <code>QMLDirectoryLexer.nextToken(true)</code>.
*
* @return the next token in the <code>InputStream</code>
* @throws IllegalArgumentException
* if <code>setInput</code> has not been called
*/
public Token nextToken() throws IllegalArgumentException {
return nextToken(true);
}
/**
* Retrieves the next valid token from the <code>InputStream</code> given by <code>setInput</code>. This method has the ability
* to skip over whitespace tokens by setting <code>skipWhitespace</code> to <code>true</code>.
*
* @param skipWhitespace
* whether or not the lexer should skip whitespace tokens
* @return the next token in the <code>InputStream</code>
* @throws IllegalArgumentException
* if <code>setInput</code> has not been called
*/
public Token nextToken(boolean skipWhitespace) throws IllegalArgumentException {
if (input == null) {
throw new IllegalArgumentException("Input cannot be null"); //$NON-NLS-1$
}
if (input.findWithinHorizon(TokenType.patternForAllTerminals(), 0) == null) {
if (lastMatch != null) {
return new Token(TokenType.EOF, "", lastMatch.end(), lastMatch.end(), currentLine, currentLineStart); //$NON-NLS-1$
} else {
return new Token(TokenType.EOF, "", 0, 0, 1, 0); //$NON-NLS-1$
}
} else {
int groupNo = 1;
for (TokenType t : TokenType.values()) {
if (t.regex != null) {
if (input.match().start(groupNo) != -1) {
lastMatch = input.match();
Token next = null;
if (!(t.equals(TokenType.WHITESPACE) && skipWhitespace)) {
next = new Token(t, input.match(), currentLine, currentLineStart);
} else {
next = nextToken(skipWhitespace);
}
if (t.equals(TokenType.COMMAND_END)) {
// Advance the line number information
currentLine++;
currentLineStart = input.match().end();
}
return next;
}
groupNo++;
}
}
return new Token(TokenType.UNKNOWN, input.match(), currentLine, currentLineStart);
}
}
}