/***************************************************************************** * Copyright (C) Codehaus.org * * ------------------------------------------------------------------------- * * Licensed under the Apache License, Version 2.0 (the "License"); * * you may not use this file except in compliance with the License. * * You may obtain a copy of the License at * * * * http://www.apache.org/licenses/LICENSE-2.0 * * * * Unless required by applicable law or agreed to in writing, software * * distributed under the License is distributed on an "AS IS" BASIS, * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * * See the License for the specific language governing permissions and * * limitations under the License. * *****************************************************************************/ package net.ion.rosetta; import net.ion.rosetta.pattern.CharPredicate; import net.ion.rosetta.pattern.CharPredicates; import net.ion.rosetta.pattern.Pattern; import net.ion.rosetta.pattern.Patterns; /** * Provides common {@link Parser} implementations that scan the source and match certain string patterns. * <p> * Some scanners like {@link #IDENTIFIER} and {@link #INTEGER} return the matched string, while others like {@link #WHITESPACES} return nothing, as indicated by the {@link Void} type parameter. In case the matched string is still needed nontheless, use the {@link Parser#source()} method. * * @author Ben Yu */ public final class Scanners { /** A scanner that scans greedily for 1 or more whitespace characters. */ public static final Parser<Void> WHITESPACES = pattern(Patterns.many1(CharPredicates.IS_WHITESPACE), "whitespaces"); /** * Matches any character in the input. Different from {@link Parsers#always()}, it fails on EOF. Also it consumes the current character in the input. */ public static final Parser<Void> ANY_CHAR = new AnyCharScanner("any character"); /** Scanner for c++/java style line comment. */ public static final Parser<Void> JAVA_LINE_COMMENT = lineComment("//"); /** Scanner for SQL style line comment. */ public static final Parser<Void> SQL_LINE_COMMENT = lineComment("--"); /** Scanner for haskell style line comment. ({@code --}) */ public static final Parser<Void> HASKELL_LINE_COMMENT = lineComment("--"); private static final Parser<Void> JAVA_BLOCK_COMMENTED = pattern(notChar2('*', '/').many(), "commented block"); /** Scanner for c++/java style block comment. */ public static final Parser<Void> JAVA_BLOCK_COMMENT = Parsers.sequence(string("/*"), JAVA_BLOCK_COMMENTED, string("*/")); /** Scanner for SQL style block comment. */ public static final Parser<Void> SQL_BLOCK_COMMENT = Parsers.sequence(string("/*"), JAVA_BLOCK_COMMENTED, string("*/")); /** Scanner for haskell style block comment. {- -} */ public static final Parser<Void> HASKELL_BLOCK_COMMENT = Parsers.sequence(string("{-"), pattern(notChar2('-', '}').many(), "commented block"), string("-}")); /** * Scanner with a pattern for SQL style string literal. A SQL string literal is a string quoted by single quote, a single quote character is escaped by 2 single quotes. */ public static final Parser<String> SINGLE_QUOTE_STRING = quotedBy(pattern(Patterns.regex("(('')|[^'])*"), "quoted string"), isChar('\'')).source(); /** * Scanner with a pattern for double quoted string literal. Backslash '\' is used as escape character. */ public static final Parser<String> DOUBLE_QUOTE_STRING = quotedBy(pattern(Patterns.regex("((\\\\.)|[^\"\\\\])*"), "quoted string"), Scanners.isChar('"')).source(); /** Scanner for a c/c++/java style character literal. such as 'a' or '\\'. */ public static final Parser<String> SINGLE_QUOTE_CHAR = quotedBy(pattern(Patterns.regex("(\\\\.)|[^'\\\\]"), "quoted char"), Scanners.isChar('\'')).source(); /** * Scanner for the c++/java style delimiter of tokens. For example, whitespaces, line comment and block comment. */ public static final Parser<Void> JAVA_DELIMITER = Parsers.plus(WHITESPACES, JAVA_LINE_COMMENT, JAVA_BLOCK_COMMENT).skipMany(); /** * Scanner for the haskell style delimiter of tokens. For example, whitespaces, line comment and block comment. */ public static final Parser<Void> HASKELL_DELIMITER = Parsers.plus(WHITESPACES, HASKELL_LINE_COMMENT, HASKELL_BLOCK_COMMENT).skipMany(); /** * Scanner for the SQL style delimiter of tokens. For example, whitespaces and line comment. */ public static final Parser<Void> SQL_DELIMITER = Parsers.plus(WHITESPACES, SQL_LINE_COMMENT, SQL_BLOCK_COMMENT).skipMany(); /** * Scanner for a regular identifier, that starts with either an underscore or an alpha character, followed by 0 or more alphanumeric characters. */ public static final Parser<String> IDENTIFIER = pattern(Patterns.WORD, "word").source(); public static final Parser<String> ARRAY_IDENTIFIER = pattern(Patterns.ARRAYWORD, "array word").source(); /** Scanner for an integer. */ public static final Parser<String> INTEGER = pattern(Patterns.INTEGER, "integer").source(); /** Scanner for a decimal number. */ public static final Parser<String> DECIMAL = pattern(Patterns.DECIMAL, "decimal").source(); /** Scanner for a decimal number. 0 is not allowed as the leading digit. */ public static final Parser<String> DEC_INTEGER = pattern(Patterns.DEC_INTEGER, "decimal integer").source(); /** Scanner for a octal number. 0 is the leading digit. */ public static final Parser<String> OCT_INTEGER = pattern(Patterns.OCT_INTEGER, "octal integer").source(); /** * Scanner for a hexadecimal number. Has to start with {@code 0x} or {@code 0X}. */ public static final Parser<String> HEX_INTEGER = pattern(Patterns.HEX_INTEGER, "hexadecimal integer").source(); /** Scanner for a scientific notation. */ public static final Parser<String> SCIENTIFIC_NOTATION = pattern(Patterns.SCIENTIFIC_NOTATION, "scientific notation").source(); /** * A scanner that scans greedily for 0 or more characters that satisfies the given CharPredicate. * * @param predicate * the predicate object. * @return the Parser object. */ public static Parser<Void> many(CharPredicate predicate) { return pattern(Patterns.isChar(predicate).many(), predicate + "*"); } /** * A scanner that scans greedily for 1 or more characters that satisfies the given CharPredicate. * * @param predicate * the predicate object. * @return the Parser object. */ public static Parser<Void> many1(CharPredicate predicate) { return pattern(Patterns.many1(predicate), predicate + "+"); } /** * A scanner that scans greedily for 0 or more occurrences of the given pattern. * * @param pattern * the pattern object. * @param name * the name of what's expected logically. Is used in error message. * @return the Parser object. */ public static Parser<Void> many(Pattern pattern, String name) { return pattern(pattern.many(), name); } /** * A scanner that scans greedily for 1 or more occurrences of the given pattern. * * @param pattern * the pattern object. * @param name * the name of what's expected logically. Is used in error message. * @return the Parser object. */ public static Parser<Void> many1(Pattern pattern, String name) { return pattern(pattern.many1(), name); } /** * Matches the input against the specified string. * * @param str * the string to match * @return the scanner. */ public static Parser<Void> string(String str) { return string(str, str); } /** * Matches the input against the specified string. * * @param str * the string to match * @param name * the name of what's expected logically. Is used in error message. * @return the scanner. */ public static Parser<Void> string(String str, String name) { return pattern(Patterns.string(str), name); } /** * A scanner that scans the input for an occurrence of a string pattern. * * @param pattern * the pattern object. * @param name * the name of what's expected logically. Is used in error message. * @return the Parser object. */ public static Parser<Void> pattern(Pattern pattern, String name) { return new PatternScanner(name, pattern); } /** * A scanner that matches the input against the specified string case insensitively. * * @param str * the string to match * @param name * the name of what's expected logically. Is used in error message. * @return the scanner. */ public static Parser<Void> stringCaseInsensitive(String str, String name) { return pattern(Patterns.stringCaseInsensitive(str), name); } /** * A scanner that matches the input against the specified string case insensitively. * * @param str * the string to match * @return the scanner. */ public static Parser<Void> stringCaseInsensitive(String str) { return stringCaseInsensitive(str, str); } /** * A scanner that succeeds and consumes the current character if it satisfies the given {@link CharPredicate}. * * @param predicate * the predicate. * @return the scanner. */ public static Parser<Void> isChar(CharPredicate predicate) { return isChar(predicate, predicate.toString()); } /** * A scanner that succeeds and consumes the current character if it satisfies the given {@link CharPredicate}. * * @param predicate * the predicate. * @param name * the name of what's expected logically. Is used in error message. * @return the scanner. */ public static Parser<Void> isChar(CharPredicate predicate, String name) { return new IsCharScanner(name, predicate); } /** * A scanner that succeeds and consumes the current character if it is equal to {@code ch}. * * @param ch * the expected character. * @param name * the name of what's expected logically. Is used in error message. * @return the scanner. */ public static Parser<Void> isChar(char ch, String name) { return isChar(CharPredicates.isChar(ch), name); } /** * A scanner that succeeds and consumes the current character if it is equal to {@code ch}. * * @param ch * the expected character. * @return the scanner. */ public static Parser<Void> isChar(char ch) { return isChar(ch, Character.toString(ch)); } /** * A scanner that succeeds and consumes the current character if it is equal to {@code ch}. * * @param ch * the expected character. * @param name * the name of what's expected logically. Is used in error mesage. * @return the scanner. */ public static Parser<Void> notChar(char ch, String name) { return isChar(CharPredicates.notChar(ch), name); } /** * A scanner that succeeds and consumes the current character if it is not equal to {@code ch}. * * @param ch * the expected character. * @return the scanner. */ public static Parser<Void> notChar(char ch) { return notChar(ch, "^" + ch); } /** * A scanner that succeeds and consumes the current character if it equals to any character in {@code chars}. * * @param chars * the characters. * @param name * the name of what's expected logically. Is used in error message. * @return the scanner. */ public static Parser<Void> among(String chars, String name) { return isChar(CharPredicates.among(chars), name); } /** * A scanner that succeeds and consumes the current character if it equals to any character in {@code chars}. */ public static Parser<Void> among(String chars) { if (chars.length() == 0) return isChar(CharPredicates.NEVER); if (chars.length() == 1) return isChar(chars.charAt(0)); return isChar(CharPredicates.among(chars)); } /** * A scanner that succeeds and consumes the current character if it is not equal to any character in {@code chars}. * * @param chars * the characters. * @param name * the name of what's expected logically. Is used in error message. * @return the scanner. */ public static Parser<Void> notAmong(String chars, String name) { return isChar(CharPredicates.notAmong(chars), name); } /** * A scanner that succeeds and consumes the current character if it is not equal to any character in {@code chars}. */ public static Parser<Void> notAmong(String chars) { if (chars.length() == 0) return ANY_CHAR; if (chars.length() == 1) return notChar(chars.charAt(0)); return isChar(CharPredicates.notAmong(chars)); } /** * A scanner that succeeds and consumes all the characters until the {@code '\n'} character if the current input starts with the string literal {@code begin}. The {@code '\n'} character isn't consumed. */ public static Parser<Void> lineComment(String begin) { return pattern(Patterns.lineComment(begin), begin); } /** * A scanner for non-nested block comment that starts with {@code begin} and ends with {@code end}. */ public static Parser<Void> blockComment(String begin, String end) { Pattern opening = Patterns.string(begin).next(Patterns.notString(end).many()); return pattern(opening, begin).next(string(end)); } /** * A scanner for a non-nestable block comment that starts with {@code begin} and ends with {@code end}. * * @param begin * begins a block comment * @param end * ends a block comment * @param commented * the commented pattern. * @return the Scanner for the block comment. */ public static Parser<Void> blockComment(String begin, String end, Pattern commented) { Pattern opening = Patterns.string(begin).next(Patterns.string(end).not().next(commented).many()); return pattern(opening, begin).next(string(end)); } /** * A scanner for a non-nestable block comment that starts with {@code begin} and ends with {@code end}. * * @param begin * begins a block comment * @param end * ends a block comment * @param commented * the commented pattern. * @return the Scanner for the block comment. */ public static Parser<Void> blockComment(Parser<Void> begin, Parser<Void> end, Parser<?> commented) { return Parsers.sequence(begin, end.not().next(commented).skipMany(), end); } /** * A scanner for a nestable block comment that starts with {@code begin} and ends with {@code end}. * * @param begin * begins a block comment * @param end * ends a block comment * @return the block comment scanner. */ public static Parser<Void> nestableBlockComment(String begin, String end) { return nestableBlockComment(begin, end, Patterns.isChar(CharPredicates.ALWAYS)); } /** * A scanner for a nestable block comment that starts with {@code begin} and ends with {@code end}. * * @param begin * begins a block comment * @param end * ends a block comment * @param commented * the commented pattern except for nested comments. * @return the block comment scanner. */ public static Parser<Void> nestableBlockComment(String begin, String end, Pattern commented) { return nestableBlockComment(string(begin), string(end), pattern(commented, "commented")); } /** * A scanner for a nestable block comment that starts with {@code begin} and ends with {@code end}. * * @param begin * starts a block comment * @param end * ends a block comment * @param commented * the commented pattern except for nested comments. * @return the block comment scanner. */ public static Parser<Void> nestableBlockComment(Parser<?> begin, Parser<?> end, Parser<?> commented) { return new NestableBlockCommentScanner(begin, end, commented); } /** * A scanner for a quoted string that starts with character {@code begin} and ends with character {@code end}. */ public static Parser<String> quoted(char begin, char end) { Pattern beforeClosingQuote = Patterns.isChar(begin).next(Patterns.many(CharPredicates.notChar(end))); return pattern(beforeClosingQuote, Character.toString(begin)).next(isChar(end)).source(); } /** * A scanner for a quoted string that starts with {@code begin} and ends with {@code end}. * * @param begin * begins a quote * @param end * ends a quote * @param quoted * the parser that recognizes the quoted pattern. * @return the scanner. */ public static Parser<String> quoted(Parser<Void> begin, Parser<Void> end, Parser<?> quoted) { return Parsers.sequence(begin, quoted.skipMany(), end).source(); } /** * A scanner that after character level {@code outer} succeeds, subsequently feeds the recognized characters to {@code inner} for a nested scanning. * * <p> * Is useful for scenaios like parsing string interpolation grammar. */ public static Parser<Void> nestedScanner(Parser<?> outer, Parser<Void> inner) { return new NestedScanner(outer, inner); } /** * Matches a character if the input has at least 1 character, or if the input has at least 2 characters with the first 2 characters not being {@code c1} and {@code c2}. * * @return the Pattern object. */ private static Pattern notChar2(final char c1, final char c2) { return new Pattern() { @Override public int match(CharSequence src, int begin, int end) { if (begin == end - 1) return 1; if (begin >= end) return MISMATCH; if (src.charAt(begin) == c1 && src.charAt(begin + 1) == c2) return Pattern.MISMATCH; return 1; } }; } private static Parser<Void> quotedBy(Parser<Void> parser, Parser<?> quote) { return parser.between(quote, quote); } private Scanners() { } }