/* * Copyright 2012 NGDATA nv * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.lilyproject.indexer.derefmap; import java.io.IOException; import java.util.Collection; import java.util.HashSet; import java.util.Map; import java.util.Set; import com.google.common.collect.Sets; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.util.Bytes; import org.lilyproject.hbaseindex.Index; import org.lilyproject.hbaseindex.IndexDefinition; import org.lilyproject.hbaseindex.IndexEntry; import org.lilyproject.hbaseindex.IndexManager; import org.lilyproject.hbaseindex.IndexNotFoundException; import org.lilyproject.hbaseindex.Query; import org.lilyproject.hbaseindex.QueryResult; import org.lilyproject.repository.api.AbsoluteRecordId; import org.lilyproject.repository.api.IdGenerator; import org.lilyproject.repository.api.RecordId; import org.lilyproject.repository.api.SchemaId; import org.lilyproject.util.hbase.HBaseTableFactory; import org.lilyproject.util.io.Closer; /** * */ public class DerefMapHbaseImpl implements DerefMap { private static final byte[] DEPENDENCIES_KEY = Bytes.toBytes("dependencies"); private static final byte[] FIELDS_KEY = Bytes.toBytes("fields"); private static final byte[] DUMMY_IDENTIFIER = new byte[]{0}; private Index forwardDerefIndex; private Index backwardDerefIndex; private DerefMapSerializationUtil serializationUtil; /** * Private constructor. Clients should use static factory methods {@link #delete(String, * org.apache.hadoop.conf.Configuration)} and {@link #create(String, Configuration, HBaseTableFactory, * IdGenerator)} */ private DerefMapHbaseImpl(final String owningRepoName, final String indexName, final Configuration hbaseConfiguration, final HBaseTableFactory tableFactory, final IdGenerator idGenerator) throws IndexNotFoundException, IOException, InterruptedException { this.serializationUtil = new DerefMapSerializationUtil(idGenerator); final IndexManager indexManager = new IndexManager(hbaseConfiguration, tableFactory); IndexDefinition forwardIndexDef = new IndexDefinition(forwardIndexName(indexName)); // For the record ID we use a variable length byte array field of which the first two bytes are fixed length // The first byte is actually the record identifier byte. // The second byte really is the first byte of the record id. We put this in the fixed length part // (safely because a record id should at least be a single byte long) because this prevents BCD encoding // on the first byte, thus making it easier to configure table splitting based on the original input. forwardIndexDef.addVariableLengthByteField("dependant_recordid", 2); forwardIndexDef.addByteField("dependant_vtag", DerefMapSerializationUtil.SCHEMA_ID_BYTE_LENGTH); forwardDerefIndex = indexManager.getIndex(owningRepoName, forwardIndexDef); IndexDefinition backwardIndexDef = new IndexDefinition(backwardIndexName(indexName)); // Same remark as in the forward index. backwardIndexDef.addVariableLengthByteField("dependency_masterrecordid", 2); backwardIndexDef.addByteField("dependant_vtag", DerefMapSerializationUtil.SCHEMA_ID_BYTE_LENGTH); backwardIndexDef.addVariableLengthByteField("variant_properties_pattern"); backwardDerefIndex = indexManager.getIndex(owningRepoName, backwardIndexDef); } /** * Create a DerefMap for a given index. If this is the first time the DerefMap is constructed for this index, * the forward and backward index tables will be created. * * @param indexName name of the index * @param hbaseConfiguration hbase configuration * @param idGenerator id generator * @throws IndexNotFoundException * @throws IOException * @throws InterruptedException */ public static DerefMap create(final String owningRepoName, final String indexName, final Configuration hbaseConfiguration, final HBaseTableFactory tableFactory, final IdGenerator idGenerator) throws IndexNotFoundException, IOException, InterruptedException { return new DerefMapHbaseImpl(owningRepoName, indexName, hbaseConfiguration, tableFactory, idGenerator); } /** * Delete a DerefMap. This will delete the corresponding hbase tables. * * @param indexName name of the index to delete * @param hbaseConfiguration hbase configuration * @throws IOException * @throws IndexNotFoundException if the index doesn't exist (maybe it was already deleted?) */ public static void delete(final String indexName, final Configuration hbaseConfiguration) throws IOException, IndexNotFoundException { final IndexManager manager = new IndexManager(hbaseConfiguration); manager.deleteIndex(forwardIndexName(indexName)); manager.deleteIndex(backwardIndexName(indexName)); } public static String forwardIndexName(String indexName) { return "deref-forward-" + indexName; } public static String backwardIndexName(String indexName) { return "deref-backward-" + indexName; } @Override public void updateDependants(AbsoluteRecordId parentRecordId, SchemaId parentVtagId, Map<DependencyEntry, Set<SchemaId>> newDependantEntries) throws IOException { final Set<DependencyEntry> existingEntries = findDependencies(parentRecordId, parentVtagId); // Figure out what changed final Set<DependencyEntry> removedDependencies = figureOutRemovedDependencies(newDependantEntries.keySet(), existingEntries); final Collection<DependencyEntry> addedDependencies = figureOutAddedDependencies(newDependantEntries.keySet(), existingEntries); // IMPORTANT implementation note: the order in which changes are applied is not arbitrary. It is such that if // the process would fail in between, there will never be left any state in the backward index which would not // be found via the forward index. // delete removed from bwd index for (DependencyEntry removed : removedDependencies) { final IndexEntry backwardEntry = createBackwardEntry(removed.getDependency(), parentRecordId, parentVtagId, null, removed.getMoreDimensionedVariants()); backwardDerefIndex.removeEntry(backwardEntry); } // update fwd index (added and removed at the same time, it is a single row) final IndexEntry fwdEntry = createForwardEntry(parentRecordId, parentVtagId, newDependantEntries.keySet()); forwardDerefIndex.addEntry(fwdEntry); // add added to bwd idx for (DependencyEntry added : addedDependencies) { final Set<SchemaId> fields = newDependantEntries.get(added); final IndexEntry backwardEntry = createBackwardEntry(added.getDependency(), parentRecordId, parentVtagId, fields, added.getMoreDimensionedVariants()); backwardDerefIndex.addEntry(backwardEntry); } } private Set<DependencyEntry> figureOutRemovedDependencies(Collection<DependencyEntry> newDependencies, Set<DependencyEntry> existingDependencies) { final Set<DependencyEntry> removed = new HashSet<DependencyEntry>(); // add all existing removed.addAll(existingDependencies); // remove all new removed.removeAll(newDependencies); return removed; } private Collection<DependencyEntry> figureOutAddedDependencies(Set<DependencyEntry> newDependencyEntries, Set<DependencyEntry> existingDependencies) { final Set<DependencyEntry> added = new HashSet<DependencyEntry>(); // add all new added.addAll(newDependencyEntries); // remove all existing added.removeAll(existingDependencies); return added; } private IndexEntry createForwardEntry(AbsoluteRecordId parentRecordId, SchemaId parentVtagId, Collection<DependencyEntry> newDependencies) throws IOException { final IndexEntry fwdEntry = new IndexEntry(forwardDerefIndex.getDefinition()); fwdEntry.addField("dependant_recordid", parentRecordId.toBytes()); fwdEntry.addField("dependant_vtag", parentVtagId.getBytes()); // we do not really use the identifier... all we are interested in is in the data of the entry fwdEntry.setIdentifier(DUMMY_IDENTIFIER); // the data contains the dependencies of the dependant (master record ids and vtags) fwdEntry.addData(DEPENDENCIES_KEY, this.serializationUtil.serializeDependenciesForward(newDependencies)); return fwdEntry; } private IndexEntry createBackwardEntry(AbsoluteRecordId parentRecordId, AbsoluteRecordId dependantRecordId, SchemaId dependantVtagId, Set<SchemaId> fields, Set<String> moreDimensionedVariantProperties) throws IOException { final byte[] serializedVariantPropertiesPattern = this.serializationUtil.serializeVariantPropertiesPattern( this.serializationUtil.createVariantPropertiesPattern(parentRecordId.getRecordId().getVariantProperties(), moreDimensionedVariantProperties)); final IndexEntry bwdEntry = new IndexEntry(backwardDerefIndex.getDefinition()); bwdEntry.addField("dependency_masterrecordid", parentRecordId.getRecordId().getMaster().toBytes()); bwdEntry.addField("dependant_vtag", dependantVtagId.getBytes()); bwdEntry.addField("variant_properties_pattern", serializedVariantPropertiesPattern); // the identifier is the dependant which depends on the dependency bwdEntry.setIdentifier(dependantRecordId.toBytes()); // the fields which the dependant uses of the dependency (null if used for deleting the entry) if (fields != null) { bwdEntry.addData(FIELDS_KEY, this.serializationUtil.serializeFields(fields)); } return bwdEntry; } /** * Find the set of record ids (and corresponding version tags) on which a given record (in a given version tag) * depends. * * @param parentRecordId record id of the record to find dependencies for * @param vtag vtag of the record to find dependencies for * @return the record ids and vtags on which the given record depends */ Set<DependencyEntry> findDependencies(AbsoluteRecordId parentRecordId, SchemaId vtag) throws IOException { final Query query = new Query(); query.addEqualsCondition("dependant_recordid", parentRecordId.toBytes()); query.addEqualsCondition("dependant_vtag", vtag.getBytes()); final Set<DependencyEntry> result; final QueryResult queryResult = forwardDerefIndex.performQuery(query); if (queryResult.next() != null) { final byte[] serializedEntries = queryResult.getData(DEPENDENCIES_KEY); result = this.serializationUtil.deserializeDependenciesForward(serializedEntries); if (queryResult.next() != null) { throw new IllegalStateException( "Expected only a single matching entry in " + forwardDerefIndex.getDefinition().getName()); } } else { result = new HashSet<DependencyEntry>(); } // Not closed in finally block: avoid HBase contact when there could be connection problems. Closer.close(queryResult); return result; } @Override public DependantRecordIdsIterator findDependantsOf(AbsoluteRecordId parentRecordId, Set<SchemaId> fields, SchemaId vtag) throws IOException { final RecordId master = parentRecordId.getRecordId().getMaster(); final Query query = new Query(); query.addEqualsCondition("dependency_masterrecordid", master.toBytes()); if (vtag != null) { query.addEqualsCondition("dependant_vtag", vtag.getBytes()); } query.setIndexFilter(new DerefMapIndexFilter(parentRecordId.getRecordId().getVariantProperties(), fields)); return new DependantRecordIdsIteratorImpl(backwardDerefIndex.performQuery(query), this.serializationUtil); } @Override public DependantRecordIdsIterator findDependantsOf(AbsoluteRecordId parentRecordId, SchemaId field, SchemaId vtag) throws IOException { return findDependantsOf(parentRecordId, field == null ? null : Sets.newHashSet(field), vtag); } @Override public DependantRecordIdsIterator findDependantsOf(AbsoluteRecordId parentRecordId) throws IOException { return findDependantsOf(parentRecordId, (Set<SchemaId>) null, null); } }