/*
* Copyright 2009-2016 Tilmann Zaeschke. All rights reserved.
*
* This file is part of ZooDB.
*
* ZooDB is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* ZooDB is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with ZooDB. If not, see <http://www.gnu.org/licenses/>.
*
* See the README and COPYING files for further information.
*/
package org.zoodb.internal.server.index;
import java.io.IOException;
import java.lang.reflect.Type;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import org.zoodb.api.impl.ZooPC;
import org.zoodb.internal.Node;
import org.zoodb.internal.PersistentSchemaOperation;
import org.zoodb.internal.ZooClassDef;
import org.zoodb.internal.ZooClassProxy;
import org.zoodb.internal.ZooFieldDef;
import org.zoodb.internal.server.CallbackPageRead;
import org.zoodb.internal.server.CallbackPageWrite;
import org.zoodb.internal.server.DiskAccessOneFile;
import org.zoodb.internal.server.StorageChannel;
import org.zoodb.internal.server.StorageChannelInput;
import org.zoodb.internal.server.StorageChannelOutput;
import org.zoodb.internal.server.DiskIO.PAGE_TYPE;
import org.zoodb.internal.server.index.PagedPosIndex.ObjectPosIteratorMerger;
import org.zoodb.internal.util.DBLogger;
import org.zoodb.internal.util.PrimLongMapLI;
import org.zoodb.internal.util.Util;
/**
* Schema Index. This class manages the indices in the database. The indices are stored separately
* from the schemata. Since schemas are not objects, they are referenced only by pageId, which
* changes every time that an index changes. To avoid rewriting all schemata every time the indices
* change, this class was introduced as a compressed version of the schemata. This should avoid
* unnecessary page writes for rewriting the schemata.
*
* Structure
* =========
* For each schema, we store a list of indices for all fields that are indexed. This list is
* compatible only with the latest version of the schema. Field-indices from older versions are
* removed (or move to the latest version, if they still exist).
*
* The pos-indices are different. We have one pos-index for each version of a schema. This is
* necessary in the case that a field+index are added. A query that matches the default value
* of the new field should return also all objects that have not been evolved yet (lazy evolution).
* This is only possible if we maintain list of objects separately for each applicable schema
* version.
*
*
* @author ztilmann
*
*/
public class SchemaIndex implements CallbackPageRead, CallbackPageWrite {
//This maps the schemaId (not the OID!) to the SchemaIndexEntry
private final PrimLongMapLI<SchemaIndexEntry> schemaIndex =
new PrimLongMapLI<SchemaIndexEntry>();
private int pageId = -1;
private final StorageChannel file;
private final StorageChannelOutput out;
private final StorageChannelInput in;
private boolean isDirty = false;
private final ArrayList<Integer> pageIDs = new ArrayList<Integer>();
//updates that require re-opening the database connection
private boolean isResetRequired = false;
private long txIdOfLastWrite = -1;
//updates that can be solved with refresh
private boolean isRefreshRequired = false;
private long txIdOfLastWriteThatRequiresRefresh = -1;
private static class FieldIndex {
//This is the unique fieldId which is maintained throughout different versions of the field
private long fieldId;
private boolean isUnique;
private FTYPE fType;
private int page;
private LongLongIndex index;
}
public static enum FTYPE {
LONG(8, Long.TYPE, "long"),
INT(4, Integer.TYPE, "int"),
SHORT(2, Short.TYPE, "short"),
BYTE(1, Byte.TYPE, "byte"),
DOUBLE(8, Double.TYPE, "double"),
FLOAT(4, Float.TYPE, "float"),
CHAR(2, Character.TYPE, "char"),
STRING(8, null, "java.lang.String"),
REF(8, Long.TYPE, ZooPC.class.getName());
// private final int len;
// private final Type type;
private final String typeName;
private FTYPE(int len, Type type, String typeName) {
// this.len = len;
// this.type = type;
this.typeName = typeName;
}
public static FTYPE fromType(ZooFieldDef fieldType) {
if (fieldType.isPersistentType()) {
return REF;
}
String typeName = fieldType.getTypeName();
for (FTYPE t: values()) {
if (t.typeName.equals(typeName)) {
return t;
}
}
throw new IllegalArgumentException("Type is not indexable: " + typeName);
}
}
/**
* Do not store classes here. On the server, the class may not be available.
*
* Otherwise it would be nice, because comparison of references to classes is faster than
* Strings, and references require much less space. Then again, there are few schema classes,
* so space is not a problem.
*/
public class SchemaIndexEntry {
private final long schemaId;
private long[] schemaOids;
//Do not store classes here! See above.
//We also do not store the class name, as it uses a lot of space, especially since
//we do not return pages to FSM except the last one.
private int[] objIndexPages;
private transient PagedPosIndex[] objIndex;
private ArrayList<FieldIndex> fieldIndices = new ArrayList<FieldIndex>();
/**
* Constructor for reading index.
*/
private SchemaIndexEntry(StorageChannelInput in) {
schemaId = in.readLong();
int nVersion = in.readShort();
schemaOids = new long[nVersion];
for (int i = 0; i < nVersion; i++) {
schemaOids[i] = in.readLong();
}
objIndexPages = new int[nVersion];
for (int i = 0; i < nVersion; i++) {
objIndexPages[i] = in.readInt();
}
objIndex = new PagedPosIndex[nVersion];
int nF = in.readShort();
for (int i = 0; i < nF; i++) {
FieldIndex fi = new FieldIndex();
fieldIndices.add(fi);
fi.fieldId = in.readLong();
fi.fType = FTYPE.values()[in.readByte()];
fi.isUnique = in.readBoolean();
fi.page = in.readInt();
}
}
/**
* Constructor for creating new Index.
* @param id
* @param cName
* @param schPage
* @param schPageOfs
* @param raf
* @param def
* @throws IOException
*/
private SchemaIndexEntry(StorageChannel file, ZooClassDef def) {
this.schemaId = def.getSchemaId();
this.schemaOids = new long[1];
this.schemaOids[0] = def.getOid();
this.objIndex = new PagedPosIndex[1];
this.objIndex[0] = PagedPosIndex.newIndex(file);
this.objIndexPages = new int[1];
}
private void write(StorageChannelOutput out) {
out.writeLong(schemaId);
out.writeShort((short) schemaOids.length);
for (long oid: schemaOids) {
out.writeLong(oid);
}
for (int page: objIndexPages) {
out.writeInt(page); //no data page yet
}
out.writeShort((short) fieldIndices.size());
for (FieldIndex fi: fieldIndices) {
out.writeLong(fi.fieldId);
out.writeByte((byte) fi.fType.ordinal());
out.writeBoolean(fi.isUnique);
out.writeInt(fi.page);
}
}
/**
* @return The pos-index for the latest schema version
*/
public PagedPosIndex getObjectIndexLatestSchemaVersion() {
// lazy loading
int v = objIndex.length-1;
if (objIndex[v] == null) {
objIndex[v] = PagedPosIndex.loadIndex(file, objIndexPages[objIndex.length-1]);
}
return objIndex[v];
}
/**
*
* @return Pos-indices for all schema versions
*/
public ObjectPosIteratorMerger getObjectIndexIterator() {
// lazy loading
ObjectPosIteratorMerger ret = new ObjectPosIteratorMerger();
for (int i = 0; i < objIndex.length; i++) {
if (objIndex[i] == null) {
objIndex[i] = PagedPosIndex.loadIndex(file, objIndexPages[i]);
}
ret.add(objIndex[i].iteratorObjects());
}
return ret;
}
public LongLongIndex defineIndex(ZooFieldDef field, boolean isUnique) {
//double check
if (!field.isPrimitiveType() && !field.isString() && !field.isPersistentType()) {
throw new IllegalArgumentException("Type cannot be indexed: " + field.getTypeName());
}
for (FieldIndex fi: fieldIndices) {
if (fi.fieldId == field.getFieldSchemaId()) {
throw new IllegalArgumentException(
"Index is already defined: " + field.getName());
}
}
FieldIndex fi = new FieldIndex();
fi.fieldId = field.getFieldSchemaId();
fi.fType = FTYPE.fromType(field);
fi.isUnique = isUnique;
field.setIndexed(true);
field.setUnique(isUnique);
//unique String indexes use a non-unique index!
if (isUnique && !field.isString()) {
fi.index = IndexFactory.createUniqueIndex(PAGE_TYPE.FIELD_INDEX, file);
} else {
fi.index = IndexFactory.createIndex(PAGE_TYPE.FIELD_INDEX, file);
}
fieldIndices.add(fi);
markRefreshRequired();
return fi.index;
}
public boolean removeIndex(ZooFieldDef field) {
Iterator<FieldIndex> iter = fieldIndices.iterator();
while (iter.hasNext()) {
FieldIndex fi = iter.next();
if (fi.fieldId == field.getFieldSchemaId()) {
iter.remove();
fi.index.clear();
field.setIndexed(false);
markRefreshRequired();
return true;
}
}
return false;
}
public LongLongIndex getIndex(ZooFieldDef field) {
for (FieldIndex fi: fieldIndices) {
if (fi.fieldId == field.getFieldSchemaId()) {
if (fi.index == null) {
if (fi.isUnique && !field.isString()) {
fi.index = IndexFactory.loadUniqueIndex(PAGE_TYPE.FIELD_INDEX, file, fi.page);
} else {
fi.index = IndexFactory.loadIndex(PAGE_TYPE.FIELD_INDEX, file, fi.page);
}
}
return fi.index;
}
}
return null;
}
public ArrayList<LongLongIndex> getIndices() {
ArrayList<LongLongIndex> indices = new ArrayList<LongLongIndex>();
for (FieldIndex fi: fieldIndices) {
indices.add(fi.index);
}
return indices;
}
public ArrayList<AbstractPagedIndex> clearIndices() {
ArrayList<AbstractPagedIndex> indices = new ArrayList<AbstractPagedIndex>();
for (FieldIndex fi: fieldIndices) {
fi.index.clear();
}
return indices;
}
public boolean isUnique(ZooFieldDef field) {
for (FieldIndex fi: fieldIndices) {
if (fi.fieldId == field.getFieldSchemaId()) {
return fi.isUnique;
}
}
throw new IllegalArgumentException("Index not found for " + field.getName());
}
/**
*
* @return True if any indices were written.
*/
private boolean writeAttrIndices() {
boolean dirty = false;
for (FieldIndex fi: fieldIndices) {
//is index loaded?
if (fi.index != null && fi.index.isDirty()) {
fi.page = fi.index.write();
dirty = true;
}
}
return dirty;
}
void addVersion(ZooClassDef defNew) {
int newLen = defNew.getSchemaVersion() + 1;
schemaOids = Arrays.copyOf(schemaOids, newLen);
objIndexPages = Arrays.copyOf(objIndexPages, newLen);
objIndex = Arrays.copyOf(objIndex, newLen);
objIndex[newLen-1] = PagedPosIndex.newIndex(file);
schemaOids[newLen-1] = defNew.getOid();
//remove indexes for deleted fields
for (PersistentSchemaOperation op: defNew.getEvolutionOps()) {
if (op.isAddOp() && op.getField().isIndexed()) {
ZooFieldDef field = op.getField();
FieldIndex fi = new FieldIndex();
fi.fieldId = op.getFieldId();
fi.fType = FTYPE.fromType(field);
fi.isUnique = field.isIndexUnique();
if (fi.isUnique && !field.isString()) {
fi.index = IndexFactory.createUniqueIndex(PAGE_TYPE.FIELD_INDEX, file);
} else {
fi.index = IndexFactory.createIndex(PAGE_TYPE.FIELD_INDEX, file);
}
fieldIndices.add(fi);
} else {
for (int i = 0; i < fieldIndices.size(); i++) {
if (fieldIndices.get(i).fieldId == op.getFieldId()) {
FieldIndex fi = fieldIndices.remove(i);
fi.index.clear();
}
}
}
}
}
public PagedPosIndex getObjectIndexVersion(int version) {
// lazy loading
if (objIndex[version] == null) {
objIndex[version] = PagedPosIndex.loadIndex(file, objIndexPages[version]);
}
return objIndex[version];
}
public int getObjectIndexVersionCount() {
return objIndex.length;
}
}
public SchemaIndex(StorageChannel file, int indexPage1, boolean isNew) {
this.isDirty = isNew;
this.file = file;
this.in = file.getReader(true);
this.out = file.getWriter(true);
this.pageId = indexPage1;
if (!isNew) {
readIndex();
}
in.setOverflowCallbackRead(this);
out.setOverflowCallbackWrite(this);
}
private void readIndex() {
in.seekPageForRead(PAGE_TYPE.SCHEMA_INDEX, pageId);
int nIndex = in.readInt();
for (int i = 0; i < nIndex; i++) {
SchemaIndexEntry entry = new SchemaIndexEntry(in);
schemaIndex.put(entry.schemaId, entry);
}
}
public int write(long txId) {
//report free pages from previous read or write
for (int pID: pageIDs) {
//TODO this will only be used if we have many schemas or many versions.... Hardly tested yet.
System.out.println("Reporting: " + pID);//TODO
file.reportFreePage(pID);
}
pageIDs.clear();
//write the indices
for (SchemaIndexEntry e: schemaIndex.values()) {
//for (PagedPosIndex oi: e.objIndex) {
for (int i = 0; i < e.objIndex.length; i++) {
PagedPosIndex oi = e.objIndex[i];
if (oi != null) {
int p = oi.write();
if (p != e.objIndexPages[i]) {
markDirty();
}
e.objIndexPages[i] = p;
}
}
//write attr indices
if (e.writeAttrIndices()) {
markDirty();
}
}
if (!isDirty()) {
return pageId;
}
//now write the index directory
//we can do this only afterwards, because we need to know the pages of the indices
pageId = out.allocateAndSeekAP(PAGE_TYPE.SCHEMA_INDEX, pageId, -1);
//TODO we should use a PagedObjectAccess here. This means that we treat SchemaIndexEntries
//as objects, but would also allow proper use of FSM for them.
//number of indices
out.writeInt(schemaIndex.size());
//write the index directory
for (SchemaIndexEntry e: schemaIndex.values()) {
e.write(out);
}
out.flush();
markClean();
if (isResetRequired) {
txIdOfLastWrite = txId;
isResetRequired = false;
}
if (isRefreshRequired) {
txIdOfLastWriteThatRequiresRefresh = txId;
isRefreshRequired = false;
}
return pageId;
}
public SchemaIndexEntry getSchema(ZooClassDef def) {
return schemaIndex.get(def.getSchemaId());
}
/**
*
* @param schemaId ID of the schema, not the OID!
* @return Schema indexes
*/
public SchemaIndexEntry getSchema(long schemaId) {
return schemaIndex.get(schemaId);
}
public Collection<SchemaIndexEntry> getSchemata() {
return Collections.unmodifiableCollection(schemaIndex.values());
}
private final boolean isDirty() {
return isDirty;
}
private final void markDirty() {
isDirty = true;
}
private final void markClean() {
isDirty = false;
}
public void refreshSchema(ZooClassDef def, DiskAccessOneFile dao) {
SchemaIndexEntry e = getSchema(def);
if (e == null) {
throw DBLogger.newFatal("Schema refresh failed: " + def.getClassName());
}
dao.readObject(def).processResult();
//and check for indices
//TODO maybe we do not need this for a refresh...
for (ZooFieldDef f: def.getAllFields()) {
if (e.getIndex(f) != null) {
f.setIndexed(true);
f.setUnique(e.isUnique(f));
}
}
}
/**
* @param node
* @return List of all schemata in the database. These are loaded when the database is opened.
*/
public Collection<ZooClassDef> readSchemaAll(DiskAccessOneFile dao, Node node) {
HashMap<Long, ZooClassDef> ret = new HashMap<Long, ZooClassDef>();
for (SchemaIndexEntry se: schemaIndex.values()) {
for (long schemaOid: se.schemaOids) {
ZooClassDef def = (ZooClassDef) dao.readObject(schemaOid);
ret.put( def.getOid(), def );
}
}
// assign versions
for (ZooClassDef def: ret.values()) {
def.associateVersions(ret);
}
// assign super classes
for (ZooClassDef def: ret.values()) {
if (def.getSuperOID() != 0) {
def.associateSuperDef( ret.get(def.getSuperOID()) );
}
}
//associate fields
for (ZooClassDef def: ret.values()) {
def.associateFields();
//and check for indices
SchemaIndexEntry se = getSchema(def);
for (ZooFieldDef f: def.getAllFields()) {
if (se.getIndex(f) != null) {
f.setIndexed(true);
f.setUnique(se.isUnique(f));
}
if (f.getTypeOID() > 0) {
f.setType(ret.get(f.getTypeOID()));
}
}
}
//build proxy structure
for (ZooClassDef def: ret.values()) {
if (def.getVersionProxy() == null) {
ZooClassDef latest = def;
while (latest.getNextVersion() != null) {
latest = latest.getNextVersion();
}
//this associates proxies to super-classes and previous versions recursively
latest.associateProxy( new ZooClassProxy(latest, node.getSession()) );
}
}
return ret.values();
}
public void defineSchema(ZooClassDef def) {
markResetRequired();
// check if such an entry exists!
if (getSchema(def) != null) {
throw DBLogger.newFatal("Schema is already defined: " + def.getClassName() +
" oid=" + Util.oidToString(def.getOid()));
}
SchemaIndexEntry entry = new SchemaIndexEntry(file, def);
schemaIndex.put(def.getSchemaId(), entry);
markDirty();
}
public void undefineSchema(ZooClassProxy sch) {
markResetRequired();
//We remove it from known schema list.
SchemaIndexEntry entry = schemaIndex.remove(sch.getSchemaId());
markDirty();
if (entry == null) {
throw DBLogger.newUser("Schema not found: " + sch.getName());
}
//field indices
for (FieldIndex fi: entry.fieldIndices) {
fi.index.clear();
}
//pos index
for (PagedPosIndex oi: entry.objIndex) {
oi.clear();
}
entry.objIndex = null;
entry.schemaOids = null;
entry.objIndexPages = null;
}
public void newSchemaVersion(ZooClassDef defNew) {
markResetRequired();
//add a new version to the existing entry
SchemaIndexEntry entry = schemaIndex.get(defNew.getSchemaId());
entry.addVersion(defNew);
markDirty();
}
public ArrayList<Integer> debugPageIdsAttrIdx() {
ArrayList<Integer> ret = new ArrayList<Integer>();
for (SchemaIndexEntry e: schemaIndex.values()) {
for (FieldIndex fi: e.fieldIndices) {
ret.addAll(fi.index.debugPageIds());
}
}
return ret;
}
public void renameSchema(ZooClassDef def, String newName) {
//Nothing to do, just rewrite it here.
//TODO remove this method, should be automatically rewritten if ClassDef is dirty.
markResetRequired();
}
public void revert(int rootPage, long schemaTxId) {
schemaIndex.clear();
pageId = rootPage;
readIndex();
txIdOfLastWrite = schemaTxId;
txIdOfLastWriteThatRequiresRefresh = schemaTxId;
isResetRequired = false;
isRefreshRequired = false;
}
public long countInstances(ZooClassProxy def, boolean subClasses) {
SchemaIndexEntry entry = getSchema(def.getSchemaId());
long n = 0;
for (int i = 0; i < entry.getObjectIndexVersionCount(); i++) {
PagedPosIndex objInd = entry.getObjectIndexVersion(i);
n += objInd.size();
}
if (subClasses) {
for (ZooClassProxy sub: def.getSubProxies()) {
n += countInstances(sub, true);
}
}
return n;
}
public void notifyOverflowRead(int currentPage) {
pageIDs.add(currentPage);
}
public void notifyOverflowWrite(int currentPage) {
pageIDs.add(currentPage);
}
public ArrayList<Integer> debugGetPages() {
ArrayList<Integer> ret = new ArrayList<Integer>();
ret.addAll(pageIDs);
ret.add(pageId);
return ret;
}
public long getTxIdOfLastWrite() {
return txIdOfLastWrite;
}
/**
* Mark that a reset of concurring sessions is required, for example if the schema was changed.
*/
public void markResetRequired() {
isResetRequired = true;
}
/**
*
* @return Id of the last transaction which requires a refresh for concurrent session,
* for example if an index was added or removed.
*/
public long getTxIdOfLastWriteThatRequiresRefresh() {
return txIdOfLastWriteThatRequiresRefresh;
}
void markRefreshRequired() {
isRefreshRequired = true;
}
}