/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util.automaton;
import org.apache.lucene.util.LuceneTestCase;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.ObjectInput;
import java.io.ObjectInputStream;
import java.io.ObjectOutput;
import java.io.ObjectOutputStream;
public class TestRegExp extends LuceneTestCase {
/**
* Simple smoke test for regular expression.
*/
public void testSmoke() {
RegExp r = new RegExp("a(b+|c+)d");
Automaton a = r.toAutomaton();
assertTrue(a.isDeterministic());
CharacterRunAutomaton run = new CharacterRunAutomaton(a);
assertTrue(run.run("abbbbbd"));
assertTrue(run.run("acd"));
assertFalse(run.run("ad"));
}
/**
* Compiles a regular expression that is prohibitively expensive to
* determinize and expexts to catch an exception for it.
*/
public void testDeterminizeTooManyStates() {
// LUCENE-6046
String source = "[ac]*a[ac]{50,200}";
TooComplexToDeterminizeException expected = expectThrows(TooComplexToDeterminizeException.class, () -> {
new RegExp(source).toAutomaton();
});
assertTrue(expected.getMessage().contains(source));
}
// LUCENE-6713
public void testSerializeTooManyStatesToDeterminizeExc() throws Exception {
// LUCENE-6046
String source = "[ac]*a[ac]{50,200}";
TooComplexToDeterminizeException expected = expectThrows(TooComplexToDeterminizeException.class, () -> {
new RegExp(source).toAutomaton();
});
assertTrue(expected.getMessage().contains(source));
ByteArrayOutputStream bos = new ByteArrayOutputStream();
ObjectOutput out = new ObjectOutputStream(bos);
out.writeObject(expected);
byte[] bytes = bos.toByteArray();
ByteArrayInputStream bis = new ByteArrayInputStream(bytes);
ObjectInput in = new ObjectInputStream(bis);
TooComplexToDeterminizeException e2 = (TooComplexToDeterminizeException) in.readObject();
assertNotNull(e2.getMessage());
}
// LUCENE-6046
public void testRepeatWithEmptyString() throws Exception {
Automaton a = new RegExp("[^y]*{1,2}").toAutomaton(1000);
// paranoia:
assertTrue(a.toString().length() > 0);
}
public void testRepeatWithEmptyLanguage() throws Exception {
Automaton a = new RegExp("#*").toAutomaton(1000);
// paranoia:
assertTrue(a.toString().length() > 0);
a = new RegExp("#+").toAutomaton(1000);
assertTrue(a.toString().length() > 0);
a = new RegExp("#{2,10}").toAutomaton(1000);
assertTrue(a.toString().length() > 0);
a = new RegExp("#?").toAutomaton(1000);
assertTrue(a.toString().length() > 0);
}
}