package hu.u_szeged.nlp.pos.rfst;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.StringTokenizer;
public class RFSA {
protected int stateCount;
protected int edgeCount;
protected int startingState;
protected boolean[] ab;
protected int[] indices;
protected int[] targets;
protected String[] symbols; // input char + output
protected char[] charsymbols;
// where we are in states, cf. addEdge
protected int a;
// where we are in targets
protected int at;
protected boolean sorted;
protected String[] symbolhistory;
public static interface Processor {
void process(int state);
}
public RFSA(int startingState, int stateCount, int edgeCount) {
this.startingState = startingState;
this.stateCount = stateCount;
this.edgeCount = edgeCount;
ab = new boolean[stateCount];
targets = new int[edgeCount];
symbols = new String[edgeCount];
charsymbols = new char[edgeCount];
indices = new int[stateCount + 1];
Arrays.fill(indices, -1);
indices[stateCount] = edgeCount;
}
public boolean adeterministic() {
int undeterministic = 0;
int undets = 0;
for (int s : allStates()) {
Map<String, Integer> labels = new HashMap<String, Integer>();
boolean b = false;
for (Pair<String, Integer> p : outgoing(s)) {
if (labels.containsKey(p.getA())) {
// System.out.println(getClass().getSimpleName() +
// ": not deterministic: " + s + "(" + ab[s] + "): " +
// p.getA() + ": " + labels.get(p.getA()) +
// "(" + ab[labels.get(p.getA())] + "), " +
// p.getB() + "(" + ab[p.getB()] + ")");
b = true;
undeterministic++;
}
labels.put(p.getA(), p.getB());
}
if (b) {
undets++;
}
}
return undeterministic == 0;
}
public boolean legal(String s) {
return true;
}
public void binarySearch(int q, char c, Processor p) {
int i = indices[q];
int j = indices[q + 1];
int low = i;
int high = j - 1;
int mid;
while (low <= high) {
mid = (low + high) >> 1;
int cmp = charsymbols[mid] - c;
if (cmp == 0) {
int l = mid;
while (++mid < j && charsymbols[mid] == c)
;
while (--l >= i && charsymbols[l] == c)
;
for (int next = l + 1; next < mid; next++) {
p.process(next);
}
break;
} else if (cmp < 0) {
low = mid + 1;
} else if (cmp > 0) {
high = mid - 1;
}
}
}
// assume sorted!
public Collection<String> analyse(String s) {
char[] ac = s.toLowerCase().toCharArray();
return analyse(ac);
}
public Collection<String> analyse(char[] ac) {
Collection<String> analyses = new ArrayList<String>();
symbolhistory = new String[ac.length + 1];
analyse(startingState, ac, 0, "", analyses);
return analyses;
}
// binary search
public void analyse(int q, char[] ac, int pos, String symbol, Collection<String> analyses) {
// System.out.println(symbol);
// System.out.println(new String(ac).substring(0,pos) + " " + q +
// (ab[q]?" veg":"") );
// System.out.println(analyses);
symbolhistory[pos] = symbol;
if (pos == ac.length) {
if (ab[q]) {
analyses.add(symbol/* +"@"+getMSDLemma(ac) */);
}
return;
}
char c = ac[pos];
int i = indices[q];
int j = indices[q + 1];
int low = i;
int high = j - 1;
int mid;
while (low <= high) {
mid = (low + high) >> 1;
int cmp = charsymbols[mid] - c;
if (cmp == 0) {
int l = mid;
while (++mid < j && charsymbols[mid] == c)
;
while (--l >= i && charsymbols[l] == c)
;
for (int next = l + 1; next < mid; next++) {
analyse(targets[next], ac, pos + 1, symbol + symbols[next], analyses);
}
break;
} else if (cmp < 0) {
low = mid + 1;
} else if (cmp > 0) {
high = mid - 1;
}
}
}
// linear search
public void analyse1(int q, char[] ac, int pos, String symbol, Collection<String> analyses) {
if (pos == ac.length) {
if (ab[q]) {
analyses.add(symbol);
}
return;
}
char c = ac[pos];
int i = indices[q];
int j = indices[q + 1];
for (int next = i; next < j; next++) {
if (c == charsymbols[next]) {
analyse(targets[next], ac, pos + 1, symbol + symbols[next], analyses);
}
}
}
public void addState(int s, boolean accepting) {
ab[s] = accepting;
}
public void addEdge(int source, String label, int target) {
if (source < a) {
throw new IllegalArgumentException();
}
if (indices[source] == -1) {
indices[source] = at;
}
char input = label.charAt(0);
charsymbols[at] = input;
symbols[at] = label.substring(1);
targets[at] = target;
a = source;
at++;
}
public String getKRLemma(String symbol) {
String KR_szoto = "";
for (String morph : symbol.split("\\+")) {
int s = (morph.startsWith("$")) ? 1 : 0;
int ppp = morph.indexOf('/');
if (ppp < 0)
KR_szoto += morph.substring(s);
else
KR_szoto += morph.substring(s, ppp);
}
KR_szoto = KR_szoto.replace("@", "");
return KR_szoto;
}
protected String getLastPOS(String symbol, String pos) {
if (symbol.contains(pos))
return symbol.substring(0, symbol.indexOf(pos)) + pos.substring(0, pos.indexOf("["));
return symbol;
}
protected String getMSDLemma(char[] ac) {
String symbol = symbolhistory[symbolhistory.length - 1];
String POS = symbol;
POS = getLastPOS(POS, "/ADJ[COMPAR]");
POS = getLastPOS(POS, "/ADJ[SUPERLAT]");
POS = getLastPOS(POS, "/ADJ[SUPERSUPERLAT]");
POS = getLastPOS(POS, "/ADJ[MANNER]");
POS = getLastPOS(POS, "/NOUN[ESS_FOR]");
int p = POS.lastIndexOf('/');
int pp = POS.indexOf('<', p);
if (pp > 0)
POS = symbol.substring(0, pp);
int i = 0;
while (!(symbolhistory[i].startsWith(POS) || (!symbolhistory[i].contains("/") && symbolhistory[i].equals(POS.substring(0, p))))) {
++i;
}
String szoalak_szoto = new String(ac).substring(0, i);
// leg...
if (symbol.contains("/ADJ[SUPERLAT]")) {
szoalak_szoto = new String(ac).substring(3, i);
}
// legesleg...
if ((symbol.contains("/ADJ[SUPERSUPERLAT]")) && szoalak_szoto.startsWith("legesleg")) {
szoalak_szoto = new String(ac).substring(8, i);
}
String KR_szoto = getKRLemma(symbolhistory[i]);
String MSDszoto = KR_szoto.length() >= szoalak_szoto.length() ? KR_szoto : szoalak_szoto;
return MSDszoto;
}
public void noedge(int source) {
if (source < a) {
throw new IllegalArgumentException();
}
indices[source] = at;
a = source;
}
public void setAccepting(int state, boolean b) {
ab[state] = b;
}
public int startingState() {
return startingState;
}
public Iterable<Integer> allStates() {
return new StateIterator(stateCount);
}
public boolean isAccepting(int state) {
return ab[state];
}
public Iterable<Pair<String, Integer>> outgoing(int state) {
return new EdgeIterable(state);
}
// edges of state s are enlisted in
// [targets[indices[i]], targets[indices[i+1]])
public int size(int s) {
if (s >= stateCount) {
throw new IllegalArgumentException(s + " >= " + stateCount);
}
return indices[s + 1] - indices[s];
}
public int stateCount() {
return stateCount;
}
public String toString() {
return getClass().getSimpleName() + "[" + stateCount + ", " + edgeCount + "]";
}
public String toDetailedString() {
StringBuffer sb = new StringBuffer(" " + stateCount + ", " + edgeCount + ", " + startingState + "\n");
for (int i = 0; i < stateCount; i++) {
sb.append(" " + i + ", " + ab[i] + ", " + (indices[i + 1] - indices[i]) + "\n");
for (int j = indices[i]; j < indices[i + 1]; j++) {
sb.append(" " + targets[j] + ": >" + charsymbols[j] + "|" + symbols[j] + "<\n");
}
}
return sb.toString();
}
public class EdgeIter {
protected int state;
protected int size;
protected int start;
protected int next;
public EdgeIter(int state) {
this.state = state;
size = size(state);
start = indices[state];
next = start;
}
public boolean hasNext() {
return next < start + size;
}
public void remove() {
throw new UnsupportedOperationException();
}
}
public class EdgeIterable extends EdgeIter implements Iterable<Pair<String, Integer>>, Iterator<Pair<String, Integer>> {
public EdgeIterable(int state) {
super(state);
}
public Iterator<Pair<String, Integer>> iterator() {
return new EdgeIterable(state);
}
public Pair<String, Integer> next() {
if (!hasNext()) {
throw new NoSuchElementException();
}
int target = targets[next];
String label = charsymbols[next] + symbols[next];
next++;
return new Pair<String, Integer>(label, target);
}
}
protected Sorter createSorter(int state) {
return new Sorter(state);
}
public void sort() {
sorted = true;
for (int state = 0; state < stateCount; state++) {
if (indices[state] == indices[state + 1]) {
continue;
}
Sorter sorter = createSorter(state);
sorter.sort();
}
}
public class Sorter {
protected int state;
protected int length;
public Sorter(int state) {
this.state = state;
}
public void sort() {
length = indices[state + 1] - indices[state];
String[] as = new String[length];
char[] ac = new char[length];
int[] at = new int[length];
System.arraycopy(charsymbols, indices[state], ac, 0, length);
System.arraycopy(symbols, indices[state], as, 0, length);
System.arraycopy(targets, indices[state], at, 0, length);
Integer[] ai = new Integer[length];
for (int i = 0; i < length; i++) {
ai[i] = i + indices[state];
}
Arrays.sort(ai, new Comparator<Integer>() {
public int compare(Integer arg0, Integer arg1) {
return charsymbols[arg0] - charsymbols[arg1];
}
});
for (int i = 0; i < length; i++) {
int j = ai[i] - indices[state];
charsymbols[i + indices[state]] = ac[j];
symbols[i + indices[state]] = as[j];
targets[i + indices[state]] = at[j];
}
}
}
public int valid() {
Collection<Integer> valid = new HashSet<Integer>();
for (int i = 0; i < stateCount; i++) {
if (ab[i]) {
valid.add(i);
}
}
System.out.println(getClass().getSimpleName() + ": valid starts with " + valid.size() + " accepting states");
int size;
do {
size = valid.size();
for (int i = 0; i < stateCount; i++) {
if (valid.contains(i)) {
continue;
}
for (int j = indices[i]; j < indices[i + 1]; j++) {
if (valid.contains(targets[j])) {
valid.add(i);
break;
}
}
}
} while (valid.size() != size);
return valid.size();
}
public void print(String file) throws IOException {
PrintWriter pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8"));
pw.println(startingState + "\t" + stateCount + "\t" + edgeCount);
for (int i = 0; i < stateCount; i++) {
pw.println(i + "\t" + ab[i]);
pw.println(indices[i + 1] - indices[i]);
for (int j = indices[i]; j < indices[i + 1]; j++) {
pw.println(charsymbols[j] + symbols[j] + "\t" + targets[j]);
}
}
pw.close();
}
public int getA() {
return a;
}
public boolean[] getAb() {
return ab;
}
public int getAt() {
return at;
}
public char[] getCharsymbols() {
return charsymbols;
}
public int getEdgeCount() {
return edgeCount;
}
public int[] getIndices() {
return indices;
}
public boolean isSorted() {
return sorted;
}
public int getStartingState() {
return startingState;
}
public int getStateCount() {
return stateCount;
}
public String[] getSymbols() {
return symbols;
}
public int[] getTargets() {
return targets;
}
public static RFSA read(InputStream rfsaStream, String encoding) throws IOException {
Map<String, String> labelMap = new HashMap<String, String>();
LineNumberReader reader = new LineNumberReader(new InputStreamReader(rfsaStream, encoding));
String line = reader.readLine();
StringTokenizer st = new StringTokenizer(line);
int startIndex = Integer.parseInt(st.nextToken());
int stateCount = Integer.parseInt(st.nextToken());
int edgeCount = Integer.parseInt(st.nextToken());
RFSA rfsa = new RFSA(startIndex, stateCount, edgeCount);
for (int i = 0; i < stateCount; i++) {
// state line with state number and accepting
line = reader.readLine();
st = new StringTokenizer(line, "\t");
int state = Integer.parseInt(st.nextToken());
boolean accepting = new Boolean(st.nextToken());
rfsa.addState(state, accepting);
// line with edgecount
line = reader.readLine();
st = new StringTokenizer(line);
int edges = Integer.parseInt(st.nextToken());
if (edges == 0) {
rfsa.noedge(state);
}
// lines with edges
for (int j = 0; j < edges; j++) {
line = reader.readLine();
int index = line.indexOf('\t');
String s = line.substring(0, index);
if (s.length() == 0) {
throw new IllegalStateException();
}
int target = Integer.parseInt(line.substring(index + 1));
String label = labelMap.get(s);
if (label == null) {
labelMap.put(s, label = s);
}
rfsa.addEdge(state, label, target);
}
}
reader.close();
rfsa.sort();
return rfsa;
}
public static RFSA read(String defaultRfsa) {
// TODO Auto-generated method stub
return null;
}
}