/*
* Copyright 2010 Outerthought bvba
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.lilyproject.linkindex;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import com.google.common.collect.Sets;
import org.apache.hadoop.hbase.util.Bytes;
import org.lilyproject.hbaseindex.Index;
import org.lilyproject.hbaseindex.IndexDefinition;
import org.lilyproject.hbaseindex.IndexEntry;
import org.lilyproject.hbaseindex.IndexManager;
import org.lilyproject.hbaseindex.IndexNotFoundException;
import org.lilyproject.hbaseindex.Query;
import org.lilyproject.hbaseindex.QueryResult;
import org.lilyproject.linkindex.LinkIndexMetrics.Action;
import org.lilyproject.repository.api.AbsoluteRecordId;
import org.lilyproject.repository.api.IdGenerator;
import org.lilyproject.repository.api.RecordId;
import org.lilyproject.repository.api.RepositoryException;
import org.lilyproject.repository.api.RepositoryManager;
import org.lilyproject.repository.api.SchemaId;
import org.lilyproject.util.Pair;
import org.lilyproject.util.hbase.LilyHBaseSchema.Table;
import org.lilyproject.util.io.Closer;
/**
* The index of links that exist between documents.
*
* <p>Terminology:
*
* <ul>
* <li>referrers = backwards links = incoming links</li>
* <li>forward links = outgoing links</li>
* </ul>
*/
// IMPORTANT implementation note: the order in which changes are applied, first to the forward or first to
// the backward table, is not arbitrary. It is such that if the process would fail in between, there would
// never be left any state in the backward table which would not be found via the forward index.
public class LinkIndex {
private RepositoryManager repositoryManager;
private IdGenerator lazyIdGenerator;
private LinkIndexMetrics metrics;
private Index forwardIndex;
private Index backwardIndex;
private static final byte[] SOURCE_FIELD_KEY = Bytes.toBytes("sf");
private static final byte[] VTAG_KEY = Bytes.toBytes("vt");
public LinkIndex(final IndexManager indexManager, RepositoryManager repositoryManager) throws IndexNotFoundException, IOException,
InterruptedException {
metrics = new LinkIndexMetrics("linkIndex");
this.repositoryManager = repositoryManager;
// About the structure of these indexes:
// - the vtag comes after the recordid because this way we can delete all
// entries for a record without having to know the vtags under which they occur
// - the sourcefield will often by optional in queries, that's why it comes last
final int schemaIdByteLength = 16; // see SchemaIdImpl
{
IndexDefinition indexDef = new IndexDefinition("links-forward");
// For the record ID we use a variable length byte array field of which the first two bytes are fixed length
// The first byte is actually the record identifier byte.
// The second byte really is the first byte of the record id. We put this in the fixed length part
// (safely because a record id should at least be a single byte long) because this prevents BCD encoding
// on the first byte, thus making it easier to configure table splitting based on the original input.
indexDef.addVariableLengthByteField("source", 2);
indexDef.addByteField("vtag", schemaIdByteLength);
indexDef.addByteField("sourcefield", schemaIdByteLength);
forwardIndex = indexManager.getIndex(indexDef);
}
{
IndexDefinition indexDef = new IndexDefinition("links-backward");
// Same remark as in the forwardIndex.
indexDef.addVariableLengthByteField("target", 2);
indexDef.addByteField("vtag", schemaIdByteLength);
indexDef.addByteField("sourcefield", schemaIdByteLength);
backwardIndex = indexManager.getIndex(indexDef);
}
}
public void deleteLinks(RecordId sourceRecord) throws LinkIndexException, InterruptedException {
deleteLinks(getAbsoluteId(sourceRecord));
}
/**
* Deletes all links of a record, irrespective of the vtag.
*/
public void deleteLinks(AbsoluteRecordId sourceRecord) throws LinkIndexException, InterruptedException {
long before = System.currentTimeMillis();
try {
byte[] sourceAsBytes = sourceRecord.toBytes();
// Read links from the forwards table
Set<Pair<FieldedLink, SchemaId>> oldLinks = getAllForwardLinks(sourceRecord);
// Delete existing entries from the backwards table
List<IndexEntry> entries = new ArrayList<IndexEntry>(oldLinks.size());
for (Pair<FieldedLink, SchemaId> link : oldLinks) {
IndexEntry entry = createBackwardIndexEntry(link.getV2(), link.getV1().getAbsoluteRecordId(),
link.getV1().getFieldTypeId());
entry.setIdentifier(sourceAsBytes);
entries.add(entry);
}
backwardIndex.removeEntries(entries);
// Delete existing entries from the forwards table
entries.clear();
for (Pair<FieldedLink, SchemaId> link : oldLinks) {
IndexEntry entry = createForwardIndexEntry(link.getV2(), sourceRecord, link.getV1().getFieldTypeId());
entry.setIdentifier(link.getV1().getAbsoluteRecordId().toBytes());
entries.add(entry);
}
forwardIndex.removeEntries(entries);
} catch (LinkIndexException e) {
throw new LinkIndexException("Error deleting links for record '" + sourceRecord + "'", e);
} catch (IOException e) {
throw new LinkIndexException("Error deleting links for record '" + sourceRecord + "'", e);
} finally {
metrics.report(Action.DELETE_LINKS, System.currentTimeMillis() - before);
}
}
public void deleteLinks(RecordId sourceRecord, SchemaId vtag) throws LinkIndexException, InterruptedException {
deleteLinks(getAbsoluteId(sourceRecord), vtag);
}
public void deleteLinks(AbsoluteRecordId sourceRecord, SchemaId vtag) throws LinkIndexException, InterruptedException {
long before = System.currentTimeMillis();
try {
byte[] sourceAsBytes = sourceRecord.toBytes();
// Read links from the forwards table
Set<FieldedLink> oldLinks = getFieldedForwardLinks(sourceRecord, vtag);
// Delete existing entries from the backwards table
List<IndexEntry> entries = new ArrayList<IndexEntry>(oldLinks.size());
for (FieldedLink link : oldLinks) {
IndexEntry entry = createBackwardIndexEntry(vtag, link.getAbsoluteRecordId(), link.getFieldTypeId());
entry.setIdentifier(sourceAsBytes);
entries.add(entry);
}
backwardIndex.removeEntries(entries);
// Delete existing entries from the forwards table
entries.clear();
for (FieldedLink link : oldLinks) {
IndexEntry entry = createForwardIndexEntry(vtag, sourceRecord, link.getFieldTypeId());
entry.setIdentifier(link.getAbsoluteRecordId().toBytes());
entries.add(entry);
}
forwardIndex.removeEntries(entries);
} catch (LinkIndexException e) {
throw new LinkIndexException("Error deleting links for record '" + sourceRecord + "', vtag '" + vtag + "'",
e);
} catch (IOException e) {
throw new LinkIndexException("Error deleting links for record '" + sourceRecord + "', vtag '" + vtag + "'",
e);
} finally {
metrics.report(Action.DELETE_LINKS_VTAG, System.currentTimeMillis() - before);
}
}
public void updateLinks(RecordId sourceRecord, SchemaId vtag, Set<FieldedLink> links)
throws LinkIndexException, InterruptedException {
updateLinks(sourceRecord, vtag, links, false);
}
public void updateLinks(AbsoluteRecordId sourceRecord, SchemaId vtag, Set<FieldedLink> links)
throws LinkIndexException, InterruptedException {
updateLinks(sourceRecord, vtag, links, false);
}
/**
* @param links if this set is empty, then calling this method is equivalent to calling deleteLinks
* @param isNewRecord if this is a new record, then we can skip querying the existing links, thus gaining some
* time.
*/
public void updateLinks(RecordId sourceRecord, SchemaId vtag, Set<FieldedLink> links, boolean isNewRecord)
throws LinkIndexException, InterruptedException {
updateLinks(getAbsoluteId(sourceRecord), vtag, links, isNewRecord);
}
public void updateLinks(AbsoluteRecordId sourceRecord, SchemaId vtag, Set<FieldedLink> links, boolean isNewRecord)
throws LinkIndexException, InterruptedException {
long before = System.currentTimeMillis();
try {
// We could simply delete all the old entries using deleteLinks() and then add
// all new entries, but instead we find out what actually needs adding or removing and only
// perform that. This is to avoid running into problems due to http://search-hadoop.com/m/rNnhN15Xecu
// (= delete and put within the same millisecond).
Set<FieldedLink> oldLinks = isNewRecord ?
Collections.<FieldedLink>emptySet() : getFieldedForwardLinks(sourceRecord, vtag);
if (links.isEmpty() && oldLinks.isEmpty()) {
// No links to add, no links to remove
return;
}
// Find out what changed
Set<FieldedLink> removedLinks = new HashSet<FieldedLink>(oldLinks);
removedLinks.removeAll(links);
Set<FieldedLink> addedLinks = new HashSet<FieldedLink>(links);
addedLinks.removeAll(oldLinks);
// Apply added links
byte[] sourceAsBytes = sourceRecord.toBytes();
List<IndexEntry> fwdEntries = null;
List<IndexEntry> bkwdEntries = null;
if (addedLinks.size() > 0) {
fwdEntries = new ArrayList<IndexEntry>(Math.max(addedLinks.size(), removedLinks.size()));
bkwdEntries = new ArrayList<IndexEntry>(fwdEntries.size());
for (FieldedLink link : addedLinks) {
IndexEntry fwdEntry = createForwardIndexEntry(vtag, sourceRecord, link.getFieldTypeId());
fwdEntry.setIdentifier(link.getAbsoluteRecordId().toBytes());
fwdEntries.add(fwdEntry);
IndexEntry bkwdEntry = createBackwardIndexEntry(vtag, link.getAbsoluteRecordId(), link.getFieldTypeId());
bkwdEntry.setIdentifier(sourceAsBytes);
bkwdEntries.add(bkwdEntry);
}
forwardIndex.addEntries(fwdEntries);
backwardIndex.addEntries(bkwdEntries);
}
// Apply removed links
if (removedLinks.size() > 0) {
if (fwdEntries != null) {
fwdEntries.clear();
bkwdEntries.clear();
} else {
fwdEntries = new ArrayList<IndexEntry>(removedLinks.size());
bkwdEntries = new ArrayList<IndexEntry>(fwdEntries.size());
}
for (FieldedLink link : removedLinks) {
IndexEntry bkwdEntry = createBackwardIndexEntry(vtag, link.getAbsoluteRecordId(), link.getFieldTypeId());
bkwdEntry.setIdentifier(sourceAsBytes);
bkwdEntries.add(bkwdEntry);
IndexEntry fwdEntry = createForwardIndexEntry(vtag, sourceRecord, link.getFieldTypeId());
fwdEntry.setIdentifier(link.getAbsoluteRecordId().toBytes());
fwdEntries.add(fwdEntry);
}
backwardIndex.removeEntries(bkwdEntries);
forwardIndex.removeEntries(fwdEntries);
}
} catch (IOException e) {
throw new LinkIndexException("Error updating links for record '" + sourceRecord + "', vtag '" +
vtag + "'", e);
} finally {
metrics.report(Action.UPDATE_LINKS, System.currentTimeMillis() - before);
}
}
private IndexEntry createBackwardIndexEntry(SchemaId vtag, AbsoluteRecordId target, SchemaId sourceField) {
IndexEntry entry = new IndexEntry(backwardIndex.getDefinition());
entry.addField("vtag", vtag.getBytes());
entry.addField("target", target.toBytes());
entry.addField("sourcefield", sourceField.getBytes());
entry.addData(SOURCE_FIELD_KEY, sourceField.getBytes());
return entry;
}
private IndexEntry createForwardIndexEntry(SchemaId vtag, AbsoluteRecordId source, SchemaId sourceField) {
IndexEntry entry = new IndexEntry(forwardIndex.getDefinition());
entry.addField("vtag", vtag.getBytes());
entry.addField("source", source.toBytes());
entry.addField("sourcefield", sourceField.getBytes());
entry.addData(SOURCE_FIELD_KEY, sourceField.getBytes());
entry.addData(VTAG_KEY, vtag.getBytes());
return entry;
}
public Set<RecordId> getReferrers(RecordId record, SchemaId vtag) throws LinkIndexException, InterruptedException {
return getReferrers(record, vtag, null);
}
public Set<AbsoluteRecordId> getAbsoluteReferrers(AbsoluteRecordId record, SchemaId vtag) throws LinkIndexException, InterruptedException {
return getAbsoluteReferrers(record, vtag, null);
}
public Set<RecordId> getReferrers(RecordId record, SchemaId vtag, SchemaId sourceField)
throws LinkIndexException, InterruptedException {
return getReferrers(getAbsoluteId(record), vtag, sourceField);
}
public Set<RecordId> getReferrers(AbsoluteRecordId record, SchemaId vtag, SchemaId sourceField)
throws LinkIndexException, InterruptedException {
Set<AbsoluteRecordId> absoluteReferrers = getAbsoluteReferrers(record, vtag, sourceField);
Set<RecordId> referrers = Sets.newHashSetWithExpectedSize(absoluteReferrers.size());
for (AbsoluteRecordId absoluteReferrer : absoluteReferrers) {
referrers.add(absoluteReferrer.getRecordId());
}
return referrers;
}
public Set<AbsoluteRecordId> getAbsoluteReferrers(AbsoluteRecordId record, SchemaId vtag, SchemaId sourceField)
throws LinkIndexException, InterruptedException {
long before = System.currentTimeMillis();
try {
Query query = new Query();
query.addEqualsCondition("target", record.toBytes());
if (vtag != null) {
query.addEqualsCondition("vtag", vtag.getBytes());
}
if (sourceField != null) {
query.addEqualsCondition("sourcefield", sourceField.getBytes());
}
Set<AbsoluteRecordId> result = Sets.newHashSet();
QueryResult qr = backwardIndex.performQuery(query);
byte[] id;
while ((id = qr.next()) != null) {
result.add(getIdGenerator().absoluteFromBytes(id));
}
Closer.close(
qr); // Not closed in finally block: avoid HBase contact when there could be connection problems.
return result;
} catch (IOException e) {
throw new LinkIndexException("Error getting referrers for record '" + record + "', vtag '" + vtag +
"', field '" + sourceField + "'", e);
} finally {
metrics.report(Action.GET_REFERRERS, System.currentTimeMillis() - before);
}
}
public Set<FieldedLink> getFieldedReferrers(RecordId record, SchemaId vtag)
throws LinkIndexException, InterruptedException {
long before = System.currentTimeMillis();
try {
Query query = new Query();
query.addEqualsCondition("target", record.toBytes());
if (vtag != null) {
query.addEqualsCondition("vtag", vtag.getBytes());
}
Set<FieldedLink> result = new HashSet<FieldedLink>();
QueryResult qr = backwardIndex.performQuery(query);
byte[] id;
while ((id = qr.next()) != null) {
SchemaId sourceField = getIdGenerator().getSchemaId(qr.getData(SOURCE_FIELD_KEY));
result.add(new FieldedLink(getIdGenerator().absoluteFromBytes(id), sourceField));
}
Closer.close(
qr); // Not closed in finally block: avoid HBase contact when there could be connection problems.
return result;
} catch (IOException e) {
throw new LinkIndexException("Error getting referrers for record '" + record + "', vtag '" + vtag + "'", e);
} finally {
metrics.report(Action.GET_FIELDED_REFERRERS, System.currentTimeMillis() - before);
}
}
public Set<Pair<FieldedLink, SchemaId>> getAllForwardLinks(RecordId record)
throws LinkIndexException, InterruptedException {
return this.getAllForwardLinks(getAbsoluteId(record));
}
public Set<Pair<FieldedLink, SchemaId>> getAllForwardLinks(AbsoluteRecordId record)
throws LinkIndexException, InterruptedException {
long before = System.currentTimeMillis();
try {
Query query = new Query();
query.addEqualsCondition("source", record.toBytes());
Set<Pair<FieldedLink, SchemaId>> result = new HashSet<Pair<FieldedLink, SchemaId>>();
QueryResult qr = forwardIndex.performQuery(query);
byte[] id;
while ((id = qr.next()) != null) {
SchemaId sourceField = getIdGenerator().getSchemaId(qr.getData(SOURCE_FIELD_KEY));
SchemaId vtag = getIdGenerator().getSchemaId(qr.getData(VTAG_KEY));
result.add(
new Pair<FieldedLink, SchemaId>(new FieldedLink(getIdGenerator().absoluteFromBytes(id), sourceField), vtag));
}
Closer.close(
qr); // Not closed in finally block: avoid HBase contact when there could be connection problems.
return result;
} catch (IOException e) {
throw new LinkIndexException("Error getting forward links for record '" + record + "'", e);
} finally {
metrics.report(Action.GET_ALL_FW_LINKS, System.currentTimeMillis() - before);
}
}
public Set<RecordId> getForwardLinks(RecordId record, SchemaId vtag)
throws LinkIndexException, InterruptedException {
return getForwardLinks(record, vtag, null);
}
public Set<RecordId> getForwardLinks(RecordId record, SchemaId vtag, SchemaId sourceField)
throws LinkIndexException, InterruptedException {
Set<AbsoluteRecordId> absoluteLinks = getForwardLinks(getAbsoluteId(record), vtag, sourceField);
Set<RecordId> relativeLinks = Sets.newHashSetWithExpectedSize(absoluteLinks.size());
for (AbsoluteRecordId absoluteLink : absoluteLinks) {
relativeLinks.add(absoluteLink.getRecordId());
}
return relativeLinks;
}
public Set<AbsoluteRecordId> getForwardLinks(AbsoluteRecordId record, SchemaId vtag, SchemaId sourceField)
throws LinkIndexException, InterruptedException {
long before = System.currentTimeMillis();
try {
Query query = new Query();
query.addEqualsCondition("source", record.toBytes());
if (vtag != null) {
query.addEqualsCondition("vtag", vtag.getBytes());
}
if (sourceField != null) {
query.addEqualsCondition("sourcefield", sourceField.getBytes());
}
Set<AbsoluteRecordId> result = new HashSet<AbsoluteRecordId>();
QueryResult qr = forwardIndex.performQuery(query);
byte[] id;
while ((id = qr.next()) != null) {
result.add(getIdGenerator().absoluteFromBytes(id));
}
Closer.close(
qr); // Not closed in finally block: avoid HBase contact when there could be connection problems.
return result;
} catch (IOException e) {
throw new LinkIndexException("Error getting forward links for record '" + record + "', vtag '" +
vtag + "', field '" + sourceField + "'", e);
} finally {
metrics.report(Action.GET_FW_LINKS, System.currentTimeMillis() - before);
}
}
public Set<FieldedLink> getFieldedForwardLinks(RecordId record, SchemaId vtag)
throws LinkIndexException, InterruptedException {
return getFieldedForwardLinks(getAbsoluteId(record), vtag);
}
public Set<FieldedLink> getFieldedForwardLinks(AbsoluteRecordId record, SchemaId vtag)
throws LinkIndexException, InterruptedException {
long before = System.currentTimeMillis();
try {
Query query = new Query();
query.addEqualsCondition("source", record.toBytes());
if (vtag != null) {
query.addEqualsCondition("vtag", vtag.getBytes());
}
Set<FieldedLink> result = new HashSet<FieldedLink>();
QueryResult qr = forwardIndex.performQuery(query);
byte[] id;
while ((id = qr.next()) != null) {
SchemaId sourceField = getIdGenerator().getSchemaId(qr.getData(SOURCE_FIELD_KEY));
result.add(new FieldedLink(getIdGenerator().absoluteFromBytes(id), sourceField));
}
Closer.close(
qr); // Not closed in finally block: avoid HBase contact when there could be connection problems.
return result;
} catch (IOException e) {
throw new LinkIndexException("Error getting forward links for record '" + record + "', vtag '" +
vtag + "'", e);
} finally {
metrics.report(Action.GET_FW_LINKS, System.currentTimeMillis() - before);
}
}
private IdGenerator getIdGenerator() throws InterruptedException, LinkIndexException {
// synchronization not an issue, doesn't matter if this happens twice
// can't assign IdGenerator in constructor since the repository is a premature one
if (lazyIdGenerator == null) {
try {
lazyIdGenerator = repositoryManager.getDefaultRepository().getIdGenerator();
} catch (RepositoryException e) {
throw new LinkIndexException(e);
}
}
return lazyIdGenerator;
}
private AbsoluteRecordId getAbsoluteId(RecordId recordId) throws LinkIndexException, InterruptedException {
return getIdGenerator().newAbsoluteRecordId(Table.RECORD.name, recordId);
}
}