package org.apache.lucene.util.automaton;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util._TestUtil;
public class AutomatonTestUtil {
/** Returns random string, including full unicode range. */
public static RegExp randomRegexp(Random r) {
while (true) {
String regexp = randomRegexpString(r);
// we will also generate some undefined unicode queries
if (!UnicodeUtil.validUTF16String(regexp))
continue;
try {
// NOTE: we parse-tostring-parse again, because we are
// really abusing RegExp.toString() here (its just for debugging)
return new RegExp(new RegExp(regexp, RegExp.NONE).toString(), RegExp.NONE);
} catch (Exception e) {}
}
}
private static String randomRegexpString(Random r) {
final int end = r.nextInt(20);
if (end == 0) {
// allow 0 length
return "";
}
final char[] buffer = new char[end];
for (int i = 0; i < end; i++) {
int t = r.nextInt(11);
if (0 == t && i < end - 1) {
// Make a surrogate pair
// High surrogate
buffer[i++] = (char) _TestUtil.nextInt(r, 0xd800, 0xdbff);
// Low surrogate
buffer[i] = (char) _TestUtil.nextInt(r, 0xdc00, 0xdfff);
}
else if (t <= 1) buffer[i] = (char) r.nextInt(0x80);
else if (2 == t) buffer[i] = (char) _TestUtil.nextInt(r, 0x80, 0x800);
else if (3 == t) buffer[i] = (char) _TestUtil.nextInt(r, 0x800, 0xd7ff);
else if (4 == t) buffer[i] = (char) _TestUtil.nextInt(r, 0xe000, 0xffff);
else if (5 == t) buffer[i] = '.';
else if (6 == t) buffer[i] = '?';
else if (7 == t) buffer[i] = '*';
else if (8 == t) buffer[i] = '+';
else if (9 == t) buffer[i] = '(';
else if (10 == t) buffer[i] = ')';
}
return new String(buffer, 0, end);
}
// picks a random int code point, avoiding surrogates;
// throws IllegalArgumentException if this transition only
// accepts surrogates
private static int getRandomCodePoint(final Random r, final Transition t) {
final int code;
if (t.max < UnicodeUtil.UNI_SUR_HIGH_START ||
t.min > UnicodeUtil.UNI_SUR_HIGH_END) {
// easy: entire range is before or after surrogates
code = t.min+r.nextInt(t.max-t.min+1);
} else if (t.min >= UnicodeUtil.UNI_SUR_HIGH_START) {
if (t.max > UnicodeUtil.UNI_SUR_LOW_END) {
// after surrogates
code = 1+UnicodeUtil.UNI_SUR_LOW_END+r.nextInt(t.max-UnicodeUtil.UNI_SUR_LOW_END+1);
} else {
throw new IllegalArgumentException("transition accepts only surrogates: " + t);
}
} else if (t.max <= UnicodeUtil.UNI_SUR_LOW_END) {
if (t.min < UnicodeUtil.UNI_SUR_HIGH_START) {
// before surrogates
code = t.min + r.nextInt(UnicodeUtil.UNI_SUR_HIGH_START - t.min);
} else {
throw new IllegalArgumentException("transition accepts only surrogates: " + t);
}
} else {
// range includes all surrogates
int gap1 = UnicodeUtil.UNI_SUR_HIGH_START - t.min;
int gap2 = t.max - UnicodeUtil.UNI_SUR_LOW_END;
int c = r.nextInt(gap1+gap2);
if (c < gap1) {
code = t.min + c;
} else {
code = UnicodeUtil.UNI_SUR_LOW_END + c - gap1 + 1;
}
}
assert code >= t.min && code <= t.max && (code < UnicodeUtil.UNI_SUR_HIGH_START || code > UnicodeUtil.UNI_SUR_LOW_END):
"code=" + code + " min=" + t.min + " max=" + t.max;
return code;
}
public static class RandomAcceptedStrings {
private final Map<Transition,Boolean> leadsToAccept;
private final Automaton a;
private static class ArrivingTransition {
final State from;
final Transition t;
public ArrivingTransition(State from, Transition t) {
this.from = from;
this.t = t;
}
}
public RandomAcceptedStrings(Automaton a) {
this.a = a;
if (a.isSingleton()) {
leadsToAccept = null;
return;
}
// must use IdentityHashmap because two Transitions w/
// different start nodes can be considered the same
leadsToAccept = new IdentityHashMap<Transition,Boolean>();
final Map<State,List<ArrivingTransition>> allArriving = new HashMap<State,List<ArrivingTransition>>();
final LinkedList<State> q = new LinkedList<State>();
final Set<State> seen = new HashSet<State>();
// reverse map the transitions, so we can quickly look
// up all arriving transitions to a given state
for(State s: a.getNumberedStates()) {
for(int i=0;i<s.numTransitions;i++) {
final Transition t = s.transitionsArray[i];
List<ArrivingTransition> tl = allArriving.get(t.to);
if (tl == null) {
tl = new ArrayList<ArrivingTransition>();
allArriving.put(t.to, tl);
}
tl.add(new ArrivingTransition(s, t));
}
if (s.accept) {
q.add(s);
seen.add(s);
}
}
// Breadth-first search, from accept states,
// backwards:
while(!q.isEmpty()) {
final State s = q.removeFirst();
List<ArrivingTransition> arriving = allArriving.get(s);
if (arriving != null) {
for(ArrivingTransition at : arriving) {
final State from = at.from;
if (!seen.contains(from)) {
q.add(from);
seen.add(from);
leadsToAccept.put(at.t, Boolean.TRUE);
}
}
}
}
}
public int[] getRandomAcceptedString(Random r) {
final List<Integer> soFar = new ArrayList<Integer>();
if (a.isSingleton()) {
// accepts only one
final String s = a.singleton;
int charUpto = 0;
while(charUpto < s.length()) {
final int cp = s.codePointAt(charUpto);
charUpto += Character.charCount(cp);
soFar.add(cp);
}
} else {
State s = a.initial;
while(true) {
if (s.accept) {
if (s.numTransitions == 0) {
// stop now
break;
} else {
if (r.nextBoolean()) {
break;
}
}
}
if (s.numTransitions == 0) {
throw new RuntimeException("this automaton has dead states");
}
boolean cheat = r.nextBoolean();
final Transition t;
if (cheat) {
// pick a transition that we know is the fastest
// path to an accept state
List<Transition> toAccept = new ArrayList<Transition>();
for(int i=0;i<s.numTransitions;i++) {
final Transition t0 = s.transitionsArray[i];
if (leadsToAccept.containsKey(t0)) {
toAccept.add(t0);
}
}
if (toAccept.size() == 0) {
// this is OK -- it means we jumped into a cycle
t = s.transitionsArray[r.nextInt(s.numTransitions)];
} else {
t = toAccept.get(r.nextInt(toAccept.size()));
}
} else {
t = s.transitionsArray[r.nextInt(s.numTransitions)];
}
soFar.add(getRandomCodePoint(r, t));
s = t.to;
}
}
return ArrayUtil.toIntArray(soFar);
}
}
}