package org.basex.index.ft;
import org.basex.util.Token;
import org.basex.util.list.IntArrayList;
import org.basex.util.list.TokenList;
/**
* This class is used for temporarily storing data on full-text trie nodes.
* The format of the trie nodes stored in {@link #next} is as follows:
*
* <ul>
* <li>Structure: {@code [t, n1, ..., nk, s, p0, p1]}</li>
* <li>{@code t}: pointer on tokens</li>
* <li>{@code n1, ..., nk} are the children of the node saved as pointer
* on nextN</li>
* <li>{@code s}: size of pre values</li>
* <li>{@code p: pointer}</li>
* </ul>
* if {@code p} is a long value, it is split into 2 integers with
* {@code p0 < 0} on {@code pre/pos} where the data is stored.
* {@code t}, {@code s}, {@code p} are saved for every node.
*
* @author BaseX Team 2005-12, BSD License
* @author Sebastian Gath
*/
final class FTTrieArray {
/** List saving the token values. */
final TokenList tokens;
/** Next pointers. */
final IntArrayList next;
/**
* Constructor.
* @param is index size, number of tokens to index
*/
FTTrieArray(final int is) {
next = new IntArrayList(is);
tokens = new TokenList(is);
// add root node with k, t, s
next.add(new int[] { -1, 0, 0 });
}
/**
* Bulk loader: inserts a token into the trie. The tokens have to be
* sorted first.
* @param v value, which is to be inserted
* @param s size of the data the node will have
* @param off file offset where to read the data
*/
void insertSorted(final byte[] v, final int s, final long off) {
final int[] a = off <= Integer.MAX_VALUE ? new int[] { (int) off } :
new int[] { (int) (off >> 16 & 0XFFFFFF), -(int) (off & 0xFFFF) };
insertNode(0, v, s, a);
}
// PRIVATE METHODS ==========================================================
/**
* Inserts a node in the next array.
*
* @param cn int current node
* @param ti int id to insert
* @param ip int position where to insert ti
*/
private void insertNode(final int cn, final int ti, final int ip) {
final int[] cnn = next.get(cn);
final int[] tmp = new int[cnn.length + 1];
System.arraycopy(cnn, 0, tmp, 0, ip);
// insert node
tmp[ip] = ti;
// copy remain
System.arraycopy(cnn, ip, tmp, ip + 1, tmp.length - ip - 1);
next.set(cn, tmp);
}
/**
* Inserts a node into the trie.
* @param cn current node, which gets a new node appended; start with root (0)
* @param v value, which is to be inserted
* @param s size of the data the node will have
* @param off file offset where to read the data
* @return nodeId, parent node of new node
*/
private int insertNode(final int cn, final byte[] v, final int s,
final int[] off) {
// currentNode is root node
final int[] cnn = next.get(cn);
if(cn == 0) {
// root has successors
if(cnn.length > 3) {
final int p = pointer(cn);
if(Token.diff(tokens.get(next.get(cnn[p])[0])[0], v[0]) != 0) {
// any child has an appropriate value to valueToInsert;
// create new node and append it; save data
final int[] e = new int[2 + off.length];
e[0] = tokens.size();
tokens.add(v);
e[1] = s;
System.arraycopy(off, 0, e, 2, off.length);
next.add(e);
insertNode(cn, next.size() - 1, p + 1);
return next.size() - 1;
}
return insertNode(cnn[p], v, s, off);
}
}
final byte[] is = cnn[0] == -1 ? null : intersection(tokens.get(cnn[0]), v);
byte[] r1 = cnn[0] == -1 ? null : tokens.get(next.get(cn)[0]);
byte[] r2 = v;
if(is != null) {
r1 = bytes(r1, is.length, r1.length);
r2 = bytes(v, is.length, v.length);
}
if(is != null) {
if(r1 == null) {
if(r2 != null) {
// value of currentNode equals valueToInsert,
// but valueToInset is longer
final int p = pointer(cn);
if(p == 0 ||
Token.diff(tokens.get(next.get(cnn[p])[0])[0], r2[0]) != 0) {
// create new node and append it, because any child from curretnNode
// start with the same letter than reamin2
final int[] e = new int[2 + off.length];
e[0] = tokens.size();
tokens.add(r2);
e[1] = s;
System.arraycopy(off, 0, e, 2, off.length);
next.add(e);
insertNode(cn, next.size() - 1, p + 1);
return next.size() - 1;
}
return insertNode(cnn[p], r2, s, off);
}
} else {
if(r2 == null) {
// char1 != null && char2 == null
// value of currentNode equals valuteToInsert,
// but current has a longer value
// update value of currentNode.value with intersection
final int[] oe = new int [3 + off.length];
tokens.set(cnn[0], is);
oe[0] = cnn[0];
System.arraycopy(off, 0, oe, 3, off.length);
oe[2] = s;
cnn[0] = tokens.size();
tokens.add(r1);
next.add(cnn);
oe[1] = next.size() - 1;
next.set(cn, oe);
return next.size() - 1;
}
// char1 != null && char2 != null
// value of current node and value to insert have only one common
// letter update value of current node with intersection
tokens.set(cnn[0], is);
int[] ne = new int[5];
ne[0] = cnn[0];
//if(r2[0] < r1[0]) {
if(Token.diff(r2[0], r1[0]) < 0) {
ne[1] = next.size();
ne[2] = next.size() + 1;
} else {
ne[1] = next.size() + 1;
ne[2] = next.size();
}
ne[3] = 0;
ne[4] = 0;
next.set(cn, ne);
ne = new int[2 + off.length];
ne[0] = tokens.size();
tokens.add(r2);
ne[1] = s;
System.arraycopy(off, 0, ne, 2, off.length);
next.add(ne);
ne = new int[cnn.length];
System.arraycopy(cnn, 0, ne, 0, ne.length);
ne[0] = tokens.size();
tokens.add(r1);
next.add(ne);
return next.size() - 1;
}
} else {
// abort recursion
// no intersection between current node a value to insert
final int[] ne = new int[2 + off.length];
ne[0] = tokens.size();
tokens.add(v);
System.arraycopy(off, 0, ne, 2, off.length);
ne[1] = s;
next.add(ne);
final int p = cnn.length - 2;
insertNode(cn, next.size() - 1, p);
return next.size() - 1;
}
return -1;
}
/**
* Calculates the intersection.
* @param b1 input array one
* @param b2 input array two
* @return intersection of b1 and b2
*/
private static byte[] intersection(final byte[] b1, final byte[] b2) {
if(b1 == null || b2 == null) return null;
final int ml = Math.min(b1.length, b2.length);
int i = -1;
while(++i < ml && Token.diff(b1[i], b2[i]) == 0);
return bytes(b1, 0, i);
}
/**
* Extracts all data from start - to end position out of data.
* @param d data to be copied
* @param s start position
* @param e end position
* @return data byte[]
*/
private static byte[] bytes(final byte[] d, final int s, final int e) {
if(d == null || d.length < e || s < 0 || s == e) return null;
final byte[] tmp = new byte[e - s];
System.arraycopy(d, s, tmp, 0, tmp.length);
return tmp;
}
/**
* Returns the index of the last next pointer in the current node entry.
* @param cn current node
* @return index of the data pointer
*/
private int pointer(final int cn) {
final int[] nl = next.get(cn);
return nl[nl.length - 1] >= 0 ? nl.length - 3 : nl.length - 4;
}
}