/**
* Copyright 2002-2017 Evgeny Gryaznov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.textmapper.lapg.regex;
import org.junit.Test;
import org.textmapper.lapg.api.regex.*;
import org.textmapper.lapg.regex.RegexDefLexer.Span;
import org.textmapper.lapg.regex.RegexDefLexer.Tokens;
import org.textmapper.lapg.regex.RegexDefTree.TextSource;
import java.io.IOException;
import static org.junit.Assert.*;
/**
* Gryaznov Evgeny, 4/5/11
*/
public class RegexDefTest {
@Test
public void testParens() {
checkRegex("[a-z]");
checkRegex("[{a}(aa)]");
checkRegex("{a}{2}");
checkRegex("(A|)");
checkRegex("[^A-Z]");
checkRegex("([^A-Z]+)A");
checkRegex("([^A-Z]+|B)A");
checkRegex("(([^A-Z])+|B)A");
checkRegex("((([^A-Z])+)|B)A");
checkRegex("(((([^A-Z])+)|B)A)");
}
@Test
public void testSpecialChars() {
checkRegex("\\a");
checkRegex("\\b");
checkRegex("\\f");
checkRegex("\\n");
checkRegex("\\r");
checkRegex("\\t");
checkRegex("\\v");
}
@Test
public void testCharClasses() {
checkRegex("");
checkRegex("\\001", "\\x01");
checkRegex("\\011", "\\t");
checkRegex("\\022", "\\x12");
checkRegex("\\111", "I");
checkRegex("\\xf40");
checkErrors("\\u200", "Unexpected end of input reached");
checkErrors("\\x2x0", "invalid token at line 1: `\\x2x`, skipped");
checkErrors("\\x2x", "invalid token at line 1: `\\x2x`, skipped");
checkErrors("\\u200xx", "invalid token at line 1: `\\u200x`, skipped");
checkRegex("\\uf40b");
}
@Test
public void testIPv6() {
checkRegex("\\s*((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1," +
"4}|((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3})|:))|(" +
"([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\." +
"(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1," +
"3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\." +
"(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1," +
"4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\." +
"(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1," +
"5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\." +
"(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1," +
"6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\." +
"(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1," +
"4}){0," +
"5}:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:)))(%.+)" +
"?\\s*");
}
@Test
public void testSet() {
checkRegex("[a-z-]", "[a-z\\-]");
checkRegex("[-a-z]", "[\\-a-z]");
checkRegex("[a\\-{]", "[a\\-{]");
checkErrors("[a-{]", "invalid range in character class (after dash): `\\{', escape `-'");
checkErrors("[\\.-z]", "invalid range in character class (before dash): `\\.', escape `-'");
}
@Test
public void testUnicodeSet() {
RegexPart r = checkRegex("[\\w\\p{Ll}]");
assertTrue(r instanceof RegexSet);
CharacterSet set = ((RegexSet) r).getSet();
assertTrue(set.contains('a'));
assertTrue(set.contains('_'));
assertTrue(set.contains('\u0458'));
assertTrue(!set.contains('\u0408'));
}
@Test
public void testQuantifiers() {
checkRegex("{aaa}");
checkErrors("{aaa }", "invalid token at line 1: `{aaa `, skipped");
checkErrors("a{aaa }", "invalid token at line 1: `{aaa `, skipped");
checkRegex("a{9}");
checkRegex("a{9,}");
checkRegex("a{9,10}");
}
@Test
public void testLexer() throws IOException {
checkLexer("abc", Tokens._char, Tokens._char, Tokens._char);
checkLexer("\\w++", Tokens.charclass, Tokens.Plus, Tokens._char);
checkLexer("(\\011{1,3}{name})",
Tokens.Lparen, Tokens.escaped, Tokens.quantifier, Tokens.expand, Tokens.Rparen);
checkLexer("[^()a-z]", Tokens.LbrackXor, Tokens._char, Tokens._char, Tokens._char, Tokens.Minus, Tokens._char,
Tokens.Rbrack);
checkLexer("a{+}\\p{abc}{-}\\x12{eoi}", Tokens._char, Tokens.op_union, Tokens.charclass, Tokens.op_minus,
Tokens.escaped, Tokens.kw_eoi);
}
@Test
public void testConstants() {
checkConstantRegex("abc", null, "abc");
checkConstantRegex("(a(b)c)", null, "abc");
checkConstantRegex("ab(c)", null, "abc");
checkConstantRegex("(abc)", null, "abc");
checkConstantRegex("abc()", null, "abc");
checkConstantRegex("\\t", null, "\t");
checkConstantRegex("\\u0009", "\\t", "\t");
assertFalse(checkRegex("a{9,10}").isConstant());
assertFalse(checkRegex("aa(b|)").isConstant());
assertFalse(checkRegex("aab?").isConstant());
assertFalse(checkRegex("aab*").isConstant());
}
@Test
public void testSwitch1() {
checkParserViaSwitch("(a|[a-z]+){name}+a{9,10}\\\\.",
"list () [\n" +
"\tlist (in paren) [\n" +
"\t\tor [\n" +
"\t\t\ta\n" +
"\t\t\tquantifier {1,-1} [\n" +
"\t\t\t\t[a-z]\n" +
"\t\t\t]\n" +
"\t\t]\n" +
"\t]\n" +
"\tquantifier {1,-1} [\n" +
"\t\t{name}\n" +
"\t]\n" +
"\tquantifier {9,10} [\n" +
"\t\ta\n" +
"\t]\n" +
"\t\\\\\n" +
"\t.\n" +
"]\n");
}
@Test
public void testSwitch2() {
checkParserViaSwitch("(a|)++",
"list () [\n" +
"\tquantifier {1,-1} [\n" +
"\t\tlist (in paren) [\n" +
"\t\t\tor [\n" +
"\t\t\t\ta\n" +
"\t\t\t\t<empty>\n" +
"\t\t\t]\n" +
"\t\t]\n" +
"\t]\n" +
"\t\\+\n" +
"]\n");
}
private void checkConstantRegex(String regex, String converted, String value) {
RegexPart regexPart = checkRegex(regex, converted == null ? regex : converted);
assertTrue(regexPart.isConstant());
String val = regexPart.getConstantValue();
assertEquals(value, val);
}
private RegexPart checkRegex(String regex) {
return checkRegex(regex, regex);
}
private void checkLexer(String regex, int... tokens) throws IOException {
RegexDefLexer lexer = new RegexDefLexer(regex, (message, offset, endoffset) -> fail(message));
Span next;
for (int token : tokens) {
next = lexer.next();
assertEquals(token, next.symbol);
}
next = lexer.next();
assertEquals(Tokens.eoi, next.symbol);
}
private RegexPart checkRegex(String regex, String expected) {
RegexDefTree<RegexAstPart> result = RegexDefTree.parse(new TextSource("input", regex, 1));
if (result.hasErrors()) {
fail(result.getErrors().get(0).getMessage());
}
RegexPart root = result.getRoot();
assertNotNull(root);
assertEquals(expected, root.toString());
return root;
}
private void checkErrors(String regex, String... expectedErrors) {
RegexDefTree<RegexAstPart> result = RegexDefTree.parse(new TextSource("input", regex, 1));
assertTrue("no errors :(", result.hasErrors());
for (int i = 0; i < Math.max(expectedErrors.length, result.getErrors().size()); i++) {
String expected = i < expectedErrors.length ? expectedErrors[i] : null;
String actual = i < result.getErrors().size() ? result.getErrors().get(i).getMessage() : null;
assertEquals(expected, actual);
}
}
private void checkParserViaSwitch(String regex, String expected) {
RegexDefTree<RegexAstPart> result = RegexDefTree.parse(new TextSource("input", regex, 1));
if (result.hasErrors()) {
fail(result.getErrors().get(0).getMessage());
}
RegexPart root = result.getRoot();
String actual = root.accept(new RegexSwitch<String>() {
@Override
public String caseAny(RegexAny c) {
return c.toString();
}
@Override
public String caseChar(RegexChar c) {
return c.toString();
}
@Override
public String caseExpand(RegexExpand c) {
return c.toString();
}
@Override
public String caseList(RegexList c) {
StringBuilder sb = new StringBuilder();
sb.append("list (").append(c.isParenthesized() ? "in paren" : "").append(") [\n");
for (RegexPart regexPart : c.getElements()) {
String s = regexPart.accept(this);
for (String line : s.split("\n")) {
sb.append('\t').append(line).append('\n');
}
}
sb.append("]\n");
return sb.toString();
}
@Override
public String caseOr(RegexOr c) {
StringBuilder sb = new StringBuilder();
sb.append("or [\n");
for (RegexPart regexPart : c.getVariants()) {
String s = regexPart.accept(this);
for (String line : s.split("\n")) {
sb.append('\t').append(line).append('\n');
}
}
sb.append("]\n");
return sb.toString();
}
@Override
public String caseQuantifier(RegexQuantifier c) {
StringBuilder sb = new StringBuilder();
sb.append("quantifier {").append(c.getMin()).append(",")
.append(c.getMax()).append("} [\n");
String s = c.getInner().accept(this);
for (String line : s.split("\n")) {
sb.append('\t').append(line).append('\n');
}
sb.append("]\n");
return sb.toString();
}
@Override
public String caseSet(RegexSet c) {
return c.toString();
}
@Override
public String caseEmpty(RegexEmpty c) {
return "<empty>";
}
@Override
public String caseRange(RegexRange c) {
fail();
return null;
}
});
assertEquals(expected, actual);
}
}