package org.basex.index;
import static org.basex.util.Token.*;
import java.io.IOException;
import org.basex.core.Prop;
import org.basex.data.Data;
import org.basex.data.MetaData;
import org.basex.io.in.DataInput;
import org.basex.io.out.DataOutput;
import org.basex.util.Array;
import org.basex.util.hash.TokenBoolMap;
import org.basex.util.list.IntList;
import org.basex.util.list.TokenList;
/**
* <p>This data structure contains references to all document nodes in a
* database. The document nodes are incrementally updated.</p>
*
* <p>If updates are performed, the path order is discarded, as its continuous
* update would be more expensive in some cases (e.g. when bulk insertions of
* new documents are performed). A tree structure could be introduced to
* offer better general performance.</p>
*
* @author BaseX Team 2005-12, BSD License
* @author Christian Gruen
* @author Lukas Kircher
*/
final class Docs {
/** Data reference. */
private final Data data;
/** Pre values of document nodes (can be {@code null}).
* This variable should always be requested via {@link #docs()}. */
private IntList docList;
/** Sorted document paths (can be {@code null}).
* This variable should always be requested via {@link #paths()}. */
private TokenList pathList;
/** Ordered path indexes (can be {@code null}).
* This variable should always be requested via {@link #order()}. */
private int[] pathOrder;
/**
* Constructor.
* @param d data reference
*/
Docs(final Data d) {
data = d;
}
/**
* Reads the document index.
* @param in input stream
* @throws IOException I/O exception
*/
synchronized void read(final DataInput in) throws IOException {
docList = in.readDiffs();
}
/**
* Writes the document index.
* @param out output stream
* @throws IOException I/O exception
*/
void write(final DataOutput out) throws IOException {
out.writeDiffs(docs());
}
/**
* Initializes the document index.
*/
synchronized void init() {
docList = null;
pathList = null;
docs();
}
/**
* Returns the {@code pre} values of all document nodes.
* A single dummy node is returned if the database is empty.
* @return document nodes
*/
synchronized IntList docs() {
if(docList == null) {
final IntList il = new IntList();
final int is = data.meta.size;
for(int i = 0; i < is;) {
final int k = data.kind(i);
if(k == Data.DOC) il.add(i);
i += data.size(i, k);
}
data.meta.dirty = true;
docList = il;
}
return docList;
}
/**
* Returns the document paths, and initializes them if necessary.
* @return document paths
*/
private synchronized TokenList paths() {
if(pathList == null) {
final IntList docs = docs();
final int ds = docs.size();
final TokenList paths = new TokenList(ds);
for(int d = 0; d < ds; d++) {
paths.add(normalize(data.text(docs.get(d), true)));
}
pathList = paths;
}
return pathList;
}
/**
* Returns the document path order, and initialize the array if necessary.
* @return path order
*/
private synchronized int[] order() {
if(pathOrder == null) {
pathOrder = Array.createOrder(paths().toArray(), false, true);
}
return pathOrder;
}
/**
* Adds entries to the index and updates subsequent nodes.
* @param pre insertion position
* @param d data reference to be inserted
*/
void insert(final int pre, final Data d) {
// find all document nodes in the given data instance
final int dsize = d.meta.size;
final IntList pres = new IntList();
for(int dpre = 0; dpre < dsize;) {
final int k = d.kind(dpre);
if(k == Data.DOC) pres.add(pre + dpre);
dpre += d.size(dpre, k);
}
// insert DOC nodes and move pre values of following DOC nodes
final int[] presA = pres.toArray();
final IntList docs = docs();
final TokenList paths = paths();
int i = docs.sortedIndexOf(pre);
if(i < 0) i = -i - 1;
docs.insert(i, presA);
docs.move(dsize, i + pres.size());
final byte[][] t = new byte[presA.length][];
for(int j = 0; j < t.length; j++) {
// subtract pre to retrieve paths from given data instance
t[j] = normalize(d.text(presA[j] - pre, true));
}
paths.insert(i, t);
pathOrder = null;
}
/**
* Deletes the specified entry and updates subsequent nodes.
* @param pre pre value
* @param size number of deleted nodes
*/
void delete(final int pre, final int size) {
final IntList docs = docs();
final TokenList paths = paths();
int i = docs.sortedIndexOf(pre);
final boolean found = i >= 0;
if(i < 0) i = -i - 1;
else docs.delete(i);
docs.move(-size, i);
if(!found) return;
paths.delete(i);
pathOrder = null;
}
/**
* Updates the index after a document has been renamed.
* @param pre pre value of updated document
* @param value new name
*/
void rename(final int pre, final byte[] value) {
final IntList docs = docs();
final TokenList paths = paths();
paths.set(docs.sortedIndexOf(pre), normalize(value));
pathOrder = null;
}
/**
* Replaces entries in the index.
* @param pre insertion position
* @param size number of deleted nodes
* @param d data reference to be copied
*/
void replace(final int pre, final int size, final Data d) {
delete(pre, size);
insert(pre, d);
}
/**
* Returns the pre values of all document nodes matching the specified path.
* Exact || prefix match!
* @param path input path
* @return root nodes
*/
synchronized IntList docs(final String path) {
// invalid path, or no documents: return empty list
final String pth = MetaData.normPath(path);
if(pth == null || data.empty()) return new IntList(0);
// empty path: return all documents
final IntList docs = docs();
if(pth.isEmpty()) return docs;
// normalize paths
byte[] exct = EMPTY;
byte[] pref = normalize(token(pth));
// check for explicit directory indicator
if(!pth.endsWith("/")) {
exct = pref;
pref = concat(exct, SLASH);
}
// relevant paths: exact hits and prefixes
final IntList il = new IntList();
/* could be optimized for future access by sorting the paths first
* and then accessing only the relevant paths. Sorting might slow down
* bulk operations like insert/delete/replace though. */
final TokenList paths = paths();
for(int p = 0; p < paths.size(); p++) {
final byte[] b = paths.get(p);
if(eq(b, exct) || startsWith(b, pref)) il.add(docs.get(p));
}
return il.sort();
}
/**
* Returns the pre value of the document node matching the specified path.
* Exact match! Document paths can be sorted for faster future access or
* sorting can be disabled as it slows down bulk inserts/deletes/replaces.
* @param path input path
* @param sort sort paths before access
* @return root nodes
*/
synchronized int doc(final String path, final boolean sort) {
// invalid or empty path, or no documents: return -1
final String pth = MetaData.normPath(path);
if(pth == null || pth.isEmpty() || data.empty()) return -1;
// normalize paths
final byte[] exct = normalize(token(pth));
// relevant paths: exact match
final IntList docs = docs();
final TokenList paths = paths();
final int ts = paths.size();
if(sort) {
final int[] order = order();
final int p = find(exct);
return p > -1 && p < ts && eq(paths.get(order[p]), exct) ?
docs.get(order[p]) : -1;
}
for(int t = 0; t < ts; t++) {
if(eq(paths.get(t), exct)) return docs.get(t);
}
return -1;
}
/**
* Determines whether the given path is the path to a document directory.
* @param path given path (must be normalized, means one leading but
* no trailing slash.
* @return path to a directory or not
*/
synchronized boolean isDir(final byte[] path) {
if(path == null || data.empty()) return false;
final byte[] pa = concat(path, SLASH);
for(final byte[] b : paths()) if(startsWith(b, pa)) return true;
return false;
}
/**
* Adds the database paths for the child documents of the given path to
* the given map.
* @param path path
* @param dir returns directories instead of files
* @param tbm map; values will be {@code false} to indicate documents
*/
synchronized void children(final byte[] path,
final boolean dir, final TokenBoolMap tbm) {
final String pth = MetaData.normPath(string(path));
if(pth != null && !data.empty()) {
// normalize path to one leading + one trailing slash!
byte[] tp = concat(SLASH, token(pth));
// if the given path is the root, don't add a trailing slash
if(!pth.isEmpty()) tp = concat(tp, SLASH);
for(final byte[] to : paths()) {
if(startsWith(to, tp)) {
final byte[] toAdd = substring(to, tp.length, to.length);
final int i = indexOf(toAdd, SLASH);
// no more slashes means this must be a leaf
if(!dir && i == -1) tbm.add(toAdd, false);
else if(dir && i >= 0) tbm.add(substring(toAdd, 0, i), false);
}
}
}
}
/**
* Returns the first position matching the specified path
* (might equal the array size).
* @param v value to be found
* @return position
*/
private int find(final byte[] v) {
// binary search
final TokenList paths = paths();
final int[] po = pathOrder;
int l = 0, h = po.length - 1;
while(l <= h) {
int m = l + h >>> 1;
final int c = diff(paths.get(po[m]), v);
if(c == 0) {
// find first entry
while(m > 0 && eq(paths.get(po[m - 1]), v)) --m;
return m;
}
if(c < 0) l = m + 1;
else h = m - 1;
}
return l;
}
/**
* Returns the normalized index path representation for the specified path.
* @param path input path (without leading slash)
* @return canonical path
*/
private static byte[] normalize(final byte[] path) {
return concat(SLASH, Prop.WIN ? lc(path) : path);
}
}