package org.basex.index.value;
import static org.basex.core.Text.*;
import static org.basex.data.DataText.*;
import static org.basex.util.Token.*;
import java.io.IOException;
import org.basex.data.Data;
import org.basex.index.Index;
import org.basex.index.IndexCache;
import org.basex.index.IndexIterator;
import org.basex.index.IndexStats;
import org.basex.index.IndexToken;
import org.basex.index.RangeToken;
import org.basex.io.random.DataAccess;
import org.basex.util.Num;
import org.basex.util.Performance;
import org.basex.util.TokenBuilder;
import org.basex.util.hash.IntMap;
import org.basex.util.hash.TokenIntMap;
import org.basex.util.hash.TokenObjMap;
import org.basex.util.list.IntList;
/**
* This class provides access to attribute values and text contents stored on
* disk. The data structure is described in the {@link ValueBuilder} class.
*
* @author BaseX Team 2005-12, BSD License
* @author Christian Gruen
*/
public class DiskValues implements Index {
/** Number of index entries. */
int size;
/** ID references. */
final DataAccess idxr;
/** ID lists. */
final DataAccess idxl;
/** Value type (texts/attributes). */
final boolean text;
/** Data reference. */
final Data data;
/** Cached tokens. */
final IndexCache cache = new IndexCache();
/** Cached texts. Increases used memory, but speeds up repeated queries. */
final IntMap<byte[]> ctext = new IntMap<byte[]>();
/**
* Constructor, initializing the index structure.
* @param d data reference
* @param txt value type (texts/attributes)
* @throws IOException I/O Exception
*/
public DiskValues(final Data d, final boolean txt) throws IOException {
this(d, txt, txt ? DATATXT : DATAATV);
}
/**
* Constructor, initializing the index structure.
* @param d data reference
* @param txt value type (texts/attributes)
* @param pref file prefix
* @throws IOException I/O Exception
*/
DiskValues(final Data d, final boolean txt, final String pref)
throws IOException {
data = d;
text = txt;
idxl = new DataAccess(d.meta.dbfile(pref + 'l'));
idxr = new DataAccess(d.meta.dbfile(pref + 'r'));
size = idxl.read4();
}
@Override
public synchronized byte[] info() {
final TokenBuilder tb = new TokenBuilder();
tb.add(LI_STRUCTURE + SORTED_LIST + NL);
final long l = idxl.length() + idxr.length();
tb.add(LI_SIZE + Performance.format(l, true) + NL);
final IndexStats stats = new IndexStats(data);
for(int m = 0; m < size; ++m) {
final long pos = idxr.read5(m * 5L);
final int oc = idxl.readNum(pos);
if(stats.adding(oc)) stats.add(data.text(firstpre(pos), text));
}
stats.print(tb);
return tb.finish();
}
@Override
public synchronized IndexIterator iter(final IndexToken tok) {
if(tok instanceof RangeToken) return idRange((RangeToken) tok);
final int id = cache.id(tok.get());
if(id > 0) return iter(cache.size(id), cache.pointer(id));
final int ix = get(tok.get());
if(ix < 0) return IndexIterator.EMPTY;
final long pos = idxr.read5(ix * 5L);
return iter(idxl.readNum(pos), idxl.cursor());
}
@Override
public synchronized int count(final IndexToken it) {
if(it instanceof RangeToken) return idRange((RangeToken) it).size();
final byte[] key = it.get();
if(key.length > data.meta.maxlen) return Integer.MAX_VALUE;
final int id = cache.id(key);
if(id > 0) return cache.size(id);
final int ix = get(key);
if(ix < 0) return 0;
// get position in heap file
final long pos = idxr.read5(ix * 5L);
// the first heap entry represents the number of hits
final int nr = idxl.readNum(pos);
cache.add(key, nr, pos + Num.length(nr));
return nr;
}
@Override
public TokenIntMap entries(final byte[] prefix) {
final TokenIntMap tim = new TokenIntMap();
int ix = get(prefix);
if(ix < 0) ix = -ix - 1;
idxr.cursor(ix * 5l);
for(; ix < size; ix++) {
final long pos = idxr.read5();
final int nr = idxl.readNum(pos);
final int pre = idxl.readNum();
final byte[] key = data.text(pre, text);
cache.add(key, nr, pos + Num.length(nr));
if(!startsWith(key, prefix)) break;
tim.add(key, nr);
}
return tim;
}
/**
* Returns next values. Called by the {@link ValueBuilder}.
* @return compressed values
*/
byte[] nextValues() {
return idxr.cursor() >= idxr.length() ? EMPTY : idxl.readBytes(
idxr.read5(), idxl.read4());
}
/**
* Iterator method.
* @param s number of values
* @param ps offset
* @return iterator
*/
IndexIterator iter(final int s, final long ps) {
final IntList pres = new IntList(s);
long p = ps;
for(int l = 0, v = 0; l < s; ++l) {
v += idxl.readNum(p);
p = idxl.cursor();
pres.add(v);
}
return iter(pres);
}
/**
* Performs a range query. All index values must be numeric.
* @param tok index term
* @return results
*/
IndexIterator idRange(final RangeToken tok) {
final double min = tok.min;
final double max = tok.max;
// check if min and max are positive integers with the same number of digits
final int len = max > 0 && (long) max == max ? token(max).length : 0;
final boolean simple = len != 0 && min > 0 && (long) min == min
&& token(min).length == len;
final IntList pres = new IntList();
for(int l = 0; l < size; ++l) {
final int ds = idxl.readNum(idxr.read5(l * 5L));
int pre = idxl.readNum();
final double v = data.textDbl(pre, text);
if(v >= min && v <= max) {
// value is in range
for(int d = 0; d < ds; ++d) {
pres.add(pre);
pre += idxl.readNum();
}
} else if(simple && v > max && data.textLen(pre, text) == len) {
// if limits are integers, if min, max and current value have the same
// string length, and if current value is larger than max, test can be
// skipped, as all remaining values will be bigger
break;
}
}
return iter(pres.sort());
}
/**
* Returns an iterator for the specified id list.
* @param ids id list
* @return iterator
*/
static IndexIterator iter(final IntList ids) {
return new IndexIterator() {
final int s = ids.size();
int p = -1;
@Override
public boolean more() {
return ++p < s;
}
@Override
public int next() {
return ids.get(p);
}
@Override
public double score() {
return -1;
}
@Override
public int size() {
return s;
}
};
}
/**
* Get the first pre value from the id-list at the specified position.
* @param pos position of the id-list in {@link #idxl}
* @return pre value
*/
int firstpre(final long pos) {
// read the number of ids in the list
idxl.readNum(pos);
return idxl.readNum();
}
/**
* Binary search for key in the {@link #idxr}.
* @param key token to be found
* @return if the key is found: index of the key else: -(insertion point - 1)
*/
int get(final byte[] key) {
return get(key, 0, size - 1);
}
/**
* Binary search for key in the {@link #idxr}.
* <em>Important:</em> This method has to be called while being in the monitor
* of this instance, e.g. from a {@code synchronized}
* method.
* @param key token to be found
* @param first begin of the search interval
* @param last end of the search interval
* @return if the key is found: index of the key else: -(insertion point - 1)
*/
int get(final byte[] key, final int first, final int last) {
int l = first, h = last;
while(l <= h) {
final int m = l + h >>> 1;
byte[] txt = ctext.get(m);
if(txt == null) {
txt = data.text(firstpre(idxr.read5(m * 5L)), text);
ctext.add(m, txt);
}
final int d = diff(txt, key);
if(d == 0) return m;
if(d < 0) l = m + 1;
else h = m - 1;
}
return -(l + 1);
}
/**
* Flushes the buffered data.
* @throws IOException I/O exception
*/
public void flush() throws IOException {
idxl.flush();
idxr.flush();
}
@Override
public synchronized void close() throws IOException {
idxl.close();
idxr.close();
}
/**
* Add entries to the index.
* @param m a set of <key, id-list> pairs
*/
@SuppressWarnings("unused")
public void index(final TokenObjMap<IntList> m) { }
/**
* Delete records from the index.
* @param m a set of <key, id-list> pairs
*/
@SuppressWarnings("unused")
public void delete(final TokenObjMap<IntList> m) { }
/**
* Remove record from the index.
* @param o old record key
* @param n new record key
* @param id record id
*/
@SuppressWarnings("unused")
public void replace(final byte[] o, final byte[] n, final int id) { }
}