/* * Copyright 2009-2016 Tilmann Zaeschke. All rights reserved. * * This file is part of ZooDB. * * ZooDB is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * ZooDB is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with ZooDB. If not, see <http://www.gnu.org/licenses/>. * * See the README and COPYING files for further information. */ package org.zoodb.internal.server.index; import java.io.IOException; import java.lang.reflect.Type; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import org.zoodb.api.impl.ZooPC; import org.zoodb.internal.Node; import org.zoodb.internal.PersistentSchemaOperation; import org.zoodb.internal.ZooClassDef; import org.zoodb.internal.ZooClassProxy; import org.zoodb.internal.ZooFieldDef; import org.zoodb.internal.server.CallbackPageRead; import org.zoodb.internal.server.CallbackPageWrite; import org.zoodb.internal.server.DiskAccessOneFile; import org.zoodb.internal.server.StorageChannel; import org.zoodb.internal.server.StorageChannelInput; import org.zoodb.internal.server.StorageChannelOutput; import org.zoodb.internal.server.DiskIO.PAGE_TYPE; import org.zoodb.internal.server.index.PagedPosIndex.ObjectPosIteratorMerger; import org.zoodb.internal.util.DBLogger; import org.zoodb.internal.util.PrimLongMapLI; import org.zoodb.internal.util.Util; /** * Schema Index. This class manages the indices in the database. The indices are stored separately * from the schemata. Since schemas are not objects, they are referenced only by pageId, which * changes every time that an index changes. To avoid rewriting all schemata every time the indices * change, this class was introduced as a compressed version of the schemata. This should avoid * unnecessary page writes for rewriting the schemata. * * Structure * ========= * For each schema, we store a list of indices for all fields that are indexed. This list is * compatible only with the latest version of the schema. Field-indices from older versions are * removed (or move to the latest version, if they still exist). * * The pos-indices are different. We have one pos-index for each version of a schema. This is * necessary in the case that a field+index are added. A query that matches the default value * of the new field should return also all objects that have not been evolved yet (lazy evolution). * This is only possible if we maintain list of objects separately for each applicable schema * version. * * * @author ztilmann * */ public class SchemaIndex implements CallbackPageRead, CallbackPageWrite { //This maps the schemaId (not the OID!) to the SchemaIndexEntry private final PrimLongMapLI<SchemaIndexEntry> schemaIndex = new PrimLongMapLI<SchemaIndexEntry>(); private int pageId = -1; private final StorageChannel file; private final StorageChannelOutput out; private final StorageChannelInput in; private boolean isDirty = false; private final ArrayList<Integer> pageIDs = new ArrayList<Integer>(); //updates that require re-opening the database connection private boolean isResetRequired = false; private long txIdOfLastWrite = -1; //updates that can be solved with refresh private boolean isRefreshRequired = false; private long txIdOfLastWriteThatRequiresRefresh = -1; private static class FieldIndex { //This is the unique fieldId which is maintained throughout different versions of the field private long fieldId; private boolean isUnique; private FTYPE fType; private int page; private LongLongIndex index; } public static enum FTYPE { LONG(8, Long.TYPE, "long"), INT(4, Integer.TYPE, "int"), SHORT(2, Short.TYPE, "short"), BYTE(1, Byte.TYPE, "byte"), DOUBLE(8, Double.TYPE, "double"), FLOAT(4, Float.TYPE, "float"), CHAR(2, Character.TYPE, "char"), STRING(8, null, "java.lang.String"), REF(8, Long.TYPE, ZooPC.class.getName()); // private final int len; // private final Type type; private final String typeName; private FTYPE(int len, Type type, String typeName) { // this.len = len; // this.type = type; this.typeName = typeName; } public static FTYPE fromType(ZooFieldDef fieldType) { if (fieldType.isPersistentType()) { return REF; } String typeName = fieldType.getTypeName(); for (FTYPE t: values()) { if (t.typeName.equals(typeName)) { return t; } } throw new IllegalArgumentException("Type is not indexable: " + typeName); } } /** * Do not store classes here. On the server, the class may not be available. * * Otherwise it would be nice, because comparison of references to classes is faster than * Strings, and references require much less space. Then again, there are few schema classes, * so space is not a problem. */ public class SchemaIndexEntry { private final long schemaId; private long[] schemaOids; //Do not store classes here! See above. //We also do not store the class name, as it uses a lot of space, especially since //we do not return pages to FSM except the last one. private int[] objIndexPages; private transient PagedPosIndex[] objIndex; private ArrayList<FieldIndex> fieldIndices = new ArrayList<FieldIndex>(); /** * Constructor for reading index. */ private SchemaIndexEntry(StorageChannelInput in) { schemaId = in.readLong(); int nVersion = in.readShort(); schemaOids = new long[nVersion]; for (int i = 0; i < nVersion; i++) { schemaOids[i] = in.readLong(); } objIndexPages = new int[nVersion]; for (int i = 0; i < nVersion; i++) { objIndexPages[i] = in.readInt(); } objIndex = new PagedPosIndex[nVersion]; int nF = in.readShort(); for (int i = 0; i < nF; i++) { FieldIndex fi = new FieldIndex(); fieldIndices.add(fi); fi.fieldId = in.readLong(); fi.fType = FTYPE.values()[in.readByte()]; fi.isUnique = in.readBoolean(); fi.page = in.readInt(); } } /** * Constructor for creating new Index. * @param id * @param cName * @param schPage * @param schPageOfs * @param raf * @param def * @throws IOException */ private SchemaIndexEntry(StorageChannel file, ZooClassDef def) { this.schemaId = def.getSchemaId(); this.schemaOids = new long[1]; this.schemaOids[0] = def.getOid(); this.objIndex = new PagedPosIndex[1]; this.objIndex[0] = PagedPosIndex.newIndex(file); this.objIndexPages = new int[1]; } private void write(StorageChannelOutput out) { out.writeLong(schemaId); out.writeShort((short) schemaOids.length); for (long oid: schemaOids) { out.writeLong(oid); } for (int page: objIndexPages) { out.writeInt(page); //no data page yet } out.writeShort((short) fieldIndices.size()); for (FieldIndex fi: fieldIndices) { out.writeLong(fi.fieldId); out.writeByte((byte) fi.fType.ordinal()); out.writeBoolean(fi.isUnique); out.writeInt(fi.page); } } /** * @return The pos-index for the latest schema version */ public PagedPosIndex getObjectIndexLatestSchemaVersion() { // lazy loading int v = objIndex.length-1; if (objIndex[v] == null) { objIndex[v] = PagedPosIndex.loadIndex(file, objIndexPages[objIndex.length-1]); } return objIndex[v]; } /** * * @return Pos-indices for all schema versions */ public ObjectPosIteratorMerger getObjectIndexIterator() { // lazy loading ObjectPosIteratorMerger ret = new ObjectPosIteratorMerger(); for (int i = 0; i < objIndex.length; i++) { if (objIndex[i] == null) { objIndex[i] = PagedPosIndex.loadIndex(file, objIndexPages[i]); } ret.add(objIndex[i].iteratorObjects()); } return ret; } public LongLongIndex defineIndex(ZooFieldDef field, boolean isUnique) { //double check if (!field.isPrimitiveType() && !field.isString() && !field.isPersistentType()) { throw new IllegalArgumentException("Type cannot be indexed: " + field.getTypeName()); } for (FieldIndex fi: fieldIndices) { if (fi.fieldId == field.getFieldSchemaId()) { throw new IllegalArgumentException( "Index is already defined: " + field.getName()); } } FieldIndex fi = new FieldIndex(); fi.fieldId = field.getFieldSchemaId(); fi.fType = FTYPE.fromType(field); fi.isUnique = isUnique; field.setIndexed(true); field.setUnique(isUnique); //unique String indexes use a non-unique index! if (isUnique && !field.isString()) { fi.index = IndexFactory.createUniqueIndex(PAGE_TYPE.FIELD_INDEX, file); } else { fi.index = IndexFactory.createIndex(PAGE_TYPE.FIELD_INDEX, file); } fieldIndices.add(fi); markRefreshRequired(); return fi.index; } public boolean removeIndex(ZooFieldDef field) { Iterator<FieldIndex> iter = fieldIndices.iterator(); while (iter.hasNext()) { FieldIndex fi = iter.next(); if (fi.fieldId == field.getFieldSchemaId()) { iter.remove(); fi.index.clear(); field.setIndexed(false); markRefreshRequired(); return true; } } return false; } public LongLongIndex getIndex(ZooFieldDef field) { for (FieldIndex fi: fieldIndices) { if (fi.fieldId == field.getFieldSchemaId()) { if (fi.index == null) { if (fi.isUnique && !field.isString()) { fi.index = IndexFactory.loadUniqueIndex(PAGE_TYPE.FIELD_INDEX, file, fi.page); } else { fi.index = IndexFactory.loadIndex(PAGE_TYPE.FIELD_INDEX, file, fi.page); } } return fi.index; } } return null; } public ArrayList<LongLongIndex> getIndices() { ArrayList<LongLongIndex> indices = new ArrayList<LongLongIndex>(); for (FieldIndex fi: fieldIndices) { indices.add(fi.index); } return indices; } public ArrayList<AbstractPagedIndex> clearIndices() { ArrayList<AbstractPagedIndex> indices = new ArrayList<AbstractPagedIndex>(); for (FieldIndex fi: fieldIndices) { fi.index.clear(); } return indices; } public boolean isUnique(ZooFieldDef field) { for (FieldIndex fi: fieldIndices) { if (fi.fieldId == field.getFieldSchemaId()) { return fi.isUnique; } } throw new IllegalArgumentException("Index not found for " + field.getName()); } /** * * @return True if any indices were written. */ private boolean writeAttrIndices() { boolean dirty = false; for (FieldIndex fi: fieldIndices) { //is index loaded? if (fi.index != null && fi.index.isDirty()) { fi.page = fi.index.write(); dirty = true; } } return dirty; } void addVersion(ZooClassDef defNew) { int newLen = defNew.getSchemaVersion() + 1; schemaOids = Arrays.copyOf(schemaOids, newLen); objIndexPages = Arrays.copyOf(objIndexPages, newLen); objIndex = Arrays.copyOf(objIndex, newLen); objIndex[newLen-1] = PagedPosIndex.newIndex(file); schemaOids[newLen-1] = defNew.getOid(); //remove indexes for deleted fields for (PersistentSchemaOperation op: defNew.getEvolutionOps()) { if (op.isAddOp() && op.getField().isIndexed()) { ZooFieldDef field = op.getField(); FieldIndex fi = new FieldIndex(); fi.fieldId = op.getFieldId(); fi.fType = FTYPE.fromType(field); fi.isUnique = field.isIndexUnique(); if (fi.isUnique && !field.isString()) { fi.index = IndexFactory.createUniqueIndex(PAGE_TYPE.FIELD_INDEX, file); } else { fi.index = IndexFactory.createIndex(PAGE_TYPE.FIELD_INDEX, file); } fieldIndices.add(fi); } else { for (int i = 0; i < fieldIndices.size(); i++) { if (fieldIndices.get(i).fieldId == op.getFieldId()) { FieldIndex fi = fieldIndices.remove(i); fi.index.clear(); } } } } } public PagedPosIndex getObjectIndexVersion(int version) { // lazy loading if (objIndex[version] == null) { objIndex[version] = PagedPosIndex.loadIndex(file, objIndexPages[version]); } return objIndex[version]; } public int getObjectIndexVersionCount() { return objIndex.length; } } public SchemaIndex(StorageChannel file, int indexPage1, boolean isNew) { this.isDirty = isNew; this.file = file; this.in = file.getReader(true); this.out = file.getWriter(true); this.pageId = indexPage1; if (!isNew) { readIndex(); } in.setOverflowCallbackRead(this); out.setOverflowCallbackWrite(this); } private void readIndex() { in.seekPageForRead(PAGE_TYPE.SCHEMA_INDEX, pageId); int nIndex = in.readInt(); for (int i = 0; i < nIndex; i++) { SchemaIndexEntry entry = new SchemaIndexEntry(in); schemaIndex.put(entry.schemaId, entry); } } public int write(long txId) { //report free pages from previous read or write for (int pID: pageIDs) { //TODO this will only be used if we have many schemas or many versions.... Hardly tested yet. System.out.println("Reporting: " + pID);//TODO file.reportFreePage(pID); } pageIDs.clear(); //write the indices for (SchemaIndexEntry e: schemaIndex.values()) { //for (PagedPosIndex oi: e.objIndex) { for (int i = 0; i < e.objIndex.length; i++) { PagedPosIndex oi = e.objIndex[i]; if (oi != null) { int p = oi.write(); if (p != e.objIndexPages[i]) { markDirty(); } e.objIndexPages[i] = p; } } //write attr indices if (e.writeAttrIndices()) { markDirty(); } } if (!isDirty()) { return pageId; } //now write the index directory //we can do this only afterwards, because we need to know the pages of the indices pageId = out.allocateAndSeekAP(PAGE_TYPE.SCHEMA_INDEX, pageId, -1); //TODO we should use a PagedObjectAccess here. This means that we treat SchemaIndexEntries //as objects, but would also allow proper use of FSM for them. //number of indices out.writeInt(schemaIndex.size()); //write the index directory for (SchemaIndexEntry e: schemaIndex.values()) { e.write(out); } out.flush(); markClean(); if (isResetRequired) { txIdOfLastWrite = txId; isResetRequired = false; } if (isRefreshRequired) { txIdOfLastWriteThatRequiresRefresh = txId; isRefreshRequired = false; } return pageId; } public SchemaIndexEntry getSchema(ZooClassDef def) { return schemaIndex.get(def.getSchemaId()); } /** * * @param schemaId ID of the schema, not the OID! * @return Schema indexes */ public SchemaIndexEntry getSchema(long schemaId) { return schemaIndex.get(schemaId); } public Collection<SchemaIndexEntry> getSchemata() { return Collections.unmodifiableCollection(schemaIndex.values()); } private final boolean isDirty() { return isDirty; } private final void markDirty() { isDirty = true; } private final void markClean() { isDirty = false; } public void refreshSchema(ZooClassDef def, DiskAccessOneFile dao) { SchemaIndexEntry e = getSchema(def); if (e == null) { throw DBLogger.newFatal("Schema refresh failed: " + def.getClassName()); } dao.readObject(def).processResult(); //and check for indices //TODO maybe we do not need this for a refresh... for (ZooFieldDef f: def.getAllFields()) { if (e.getIndex(f) != null) { f.setIndexed(true); f.setUnique(e.isUnique(f)); } } } /** * @param node * @return List of all schemata in the database. These are loaded when the database is opened. */ public Collection<ZooClassDef> readSchemaAll(DiskAccessOneFile dao, Node node) { HashMap<Long, ZooClassDef> ret = new HashMap<Long, ZooClassDef>(); for (SchemaIndexEntry se: schemaIndex.values()) { for (long schemaOid: se.schemaOids) { ZooClassDef def = (ZooClassDef) dao.readObject(schemaOid); ret.put( def.getOid(), def ); } } // assign versions for (ZooClassDef def: ret.values()) { def.associateVersions(ret); } // assign super classes for (ZooClassDef def: ret.values()) { if (def.getSuperOID() != 0) { def.associateSuperDef( ret.get(def.getSuperOID()) ); } } //associate fields for (ZooClassDef def: ret.values()) { def.associateFields(); //and check for indices SchemaIndexEntry se = getSchema(def); for (ZooFieldDef f: def.getAllFields()) { if (se.getIndex(f) != null) { f.setIndexed(true); f.setUnique(se.isUnique(f)); } if (f.getTypeOID() > 0) { f.setType(ret.get(f.getTypeOID())); } } } //build proxy structure for (ZooClassDef def: ret.values()) { if (def.getVersionProxy() == null) { ZooClassDef latest = def; while (latest.getNextVersion() != null) { latest = latest.getNextVersion(); } //this associates proxies to super-classes and previous versions recursively latest.associateProxy( new ZooClassProxy(latest, node.getSession()) ); } } return ret.values(); } public void defineSchema(ZooClassDef def) { markResetRequired(); // check if such an entry exists! if (getSchema(def) != null) { throw DBLogger.newFatal("Schema is already defined: " + def.getClassName() + " oid=" + Util.oidToString(def.getOid())); } SchemaIndexEntry entry = new SchemaIndexEntry(file, def); schemaIndex.put(def.getSchemaId(), entry); markDirty(); } public void undefineSchema(ZooClassProxy sch) { markResetRequired(); //We remove it from known schema list. SchemaIndexEntry entry = schemaIndex.remove(sch.getSchemaId()); markDirty(); if (entry == null) { throw DBLogger.newUser("Schema not found: " + sch.getName()); } //field indices for (FieldIndex fi: entry.fieldIndices) { fi.index.clear(); } //pos index for (PagedPosIndex oi: entry.objIndex) { oi.clear(); } entry.objIndex = null; entry.schemaOids = null; entry.objIndexPages = null; } public void newSchemaVersion(ZooClassDef defNew) { markResetRequired(); //add a new version to the existing entry SchemaIndexEntry entry = schemaIndex.get(defNew.getSchemaId()); entry.addVersion(defNew); markDirty(); } public ArrayList<Integer> debugPageIdsAttrIdx() { ArrayList<Integer> ret = new ArrayList<Integer>(); for (SchemaIndexEntry e: schemaIndex.values()) { for (FieldIndex fi: e.fieldIndices) { ret.addAll(fi.index.debugPageIds()); } } return ret; } public void renameSchema(ZooClassDef def, String newName) { //Nothing to do, just rewrite it here. //TODO remove this method, should be automatically rewritten if ClassDef is dirty. markResetRequired(); } public void revert(int rootPage, long schemaTxId) { schemaIndex.clear(); pageId = rootPage; readIndex(); txIdOfLastWrite = schemaTxId; txIdOfLastWriteThatRequiresRefresh = schemaTxId; isResetRequired = false; isRefreshRequired = false; } public long countInstances(ZooClassProxy def, boolean subClasses) { SchemaIndexEntry entry = getSchema(def.getSchemaId()); long n = 0; for (int i = 0; i < entry.getObjectIndexVersionCount(); i++) { PagedPosIndex objInd = entry.getObjectIndexVersion(i); n += objInd.size(); } if (subClasses) { for (ZooClassProxy sub: def.getSubProxies()) { n += countInstances(sub, true); } } return n; } public void notifyOverflowRead(int currentPage) { pageIDs.add(currentPage); } public void notifyOverflowWrite(int currentPage) { pageIDs.add(currentPage); } public ArrayList<Integer> debugGetPages() { ArrayList<Integer> ret = new ArrayList<Integer>(); ret.addAll(pageIDs); ret.add(pageId); return ret; } public long getTxIdOfLastWrite() { return txIdOfLastWrite; } /** * Mark that a reset of concurring sessions is required, for example if the schema was changed. */ public void markResetRequired() { isResetRequired = true; } /** * * @return Id of the last transaction which requires a refresh for concurrent session, * for example if an index was added or removed. */ public long getTxIdOfLastWriteThatRequiresRefresh() { return txIdOfLastWriteThatRequiresRefresh; } void markRefreshRequired() { isRefreshRequired = true; } }