/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.lang.NotImplementedException;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.StrField;
import org.apache.solr.schema.TextField;
import org.apache.solr.schema.TrieIntField;
import org.apache.solr.uninverting.UninvertingReader;
import org.apache.solr.uninverting.UninvertingReader.Type;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Implementation of a cache for second order operations. This cache
* will first construct a mapping from identifiers to lucene ids.
* Next, it will read all values from a document field and build
* in-memory data structure that can be used to tell what documents
* are related.
*
* For the time being, we read whole index into memory to create
* a citation network, but this implementation should also be
* capable of holding only partial (the most accessed) citation
* network in memory. However, the initial mapping (value<->lucene id)
* will always be constructed in its entirety.
*
*/
public class CitationLRUCache<K,V> extends SolrCacheBase implements SolrCache<K,V> {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
/* An instance of this class will be shared across multiple instances
* of an LRUCache at the same time. Make sure everything is thread safe.
*/
private static class CumulativeStats {
AtomicLong lookups = new AtomicLong();
AtomicLong hits = new AtomicLong();
AtomicLong inserts = new AtomicLong();
AtomicLong evictions = new AtomicLong();
}
private CumulativeStats stats;
// per instance stats. The synchronization used for the map will also be
// used for updating these statistics (and hence they are not AtomicLongs
private long lookups;
private long hits;
private long inserts;
private long evictions;
private long warmupTime = 0;
private String description="Citation LRU Cache";
// the main objects
private LinkedHashMap<K,V> relationships;
private String[] referenceFields;
private String[] citationFields;
private String[] identifierFields = null;
private int sourceReaderHashCode = 0;
// If we detect that you are mixing int and text fields
// we'll treat all values (mappings) as text values
private boolean treatIdentifiersAsText = false;
// TODO: i'm planning to add the ability to build the cache
// incrementally (ie per index segment), but it may
// not be necessary as we are going to denormalize
// citation data outside solr and prepare everything there...
private boolean incremental = false;
private boolean reuseCache;
@SuppressWarnings({ "unchecked", "rawtypes" })
public Object init(Map args, Object persistence, CacheRegenerator regenerator) {
super.init(args, regenerator);
identifierFields = ((String)args.get("identifierFields")).split(",");
assert (identifierFields != null && identifierFields.length > 0);
incremental = "true".equals(((String)args.get("incremental")));
reuseCache = "true".equals(((String)args.get("reuseCache")));
citationFields = new String[0];
referenceFields = new String[0];
if (args.containsKey("referenceFields") && ((String)args.get("referenceFields")).trim().length() > 0) {
referenceFields = ((String)args.get("referenceFields")).split(",");
}
if (args.containsKey("citationFields") && ((String)args.get("citationFields")).trim().length() > 0) {
citationFields = ((String)args.get("citationFields")).split(",");
}
Float sizeInPercent = null;
String str = (String)args.get("size");
if (str != null && str.endsWith("%")) {
str = str.substring(0, str.length()-1);
sizeInPercent = Integer.parseInt(str) / 100f;
}
final int limit = str==null ? 1024 : Integer.parseInt(str);
str = (String)args.get("initialSize");
final int initialSize = Math.min(str==null ? 1024 : Integer.parseInt(str), limit);
description = generateDescription(limit, initialSize);
relationships = new RelationshipLinkedHashMap<K,V>(initialSize, 0.75f, true,
limit, sizeInPercent);
if (persistence==null) {
// must be the first time a cache of this type is being created
persistence = new CumulativeStats();
}
stats = (CumulativeStats)persistence;
return persistence;
}
/**
*
* @return Returns the description of this cache.
*/
private String generateDescription(int limit, int initialSize) {
String description = "CitationLRU Cache(maxSize=" + limit + ", initialSize=" + initialSize;
if (isAutowarmingOn()) {
description += ", " + getAutowarmDescription();
}
description += ')';
return description;
}
public int size() {
synchronized(relationships) {
return relationships.size();
}
}
public boolean treatsIdentifiersAsText() {
return treatIdentifiersAsText;
}
public V put(K key, V value) {
//System.out.println("put(" + key + "," + value+")");
synchronized (relationships) {
if (getState() == State.LIVE) {
stats.inserts.incrementAndGet();
}
// increment local inserts regardless of state???
// it does make it more consistent with the current size...
inserts++;
return relationships.put(key,value);
}
}
public V get(K key) {
synchronized (relationships) {
V val = relationships.get(key);
if (getState() == State.LIVE) {
// only increment lookups and hits if we are live.
lookups++;
stats.lookups.incrementAndGet();
if (val!=null) {
hits++;
stats.hits.incrementAndGet();
}
}
return val;
}
}
/*
* This method should be used only for very specific purposes of
* dumping the citation cache (or accessing all elements of
* the cache). Access to the map is not synchronized, but you
* are iterating over a copy of data - so yo cannot change it
*
* The first comes references, the second are citations
*/
public Iterator<int[][]> getCitationsIterator() {
return ((RelationshipLinkedHashMap<K,V>) relationships).getRelationshipsIterator();
}
public int getCitationsIteratorSize() {
synchronized (relationships) {
return ((RelationshipLinkedHashMap<K,V>) relationships).relationshipsDataSize();
}
}
public int[] getCitations(K key) {
synchronized (relationships) {
V val = relationships.get(key);
if (val==null)
return null;
RelationshipLinkedHashMap<K,V> relMap = (RelationshipLinkedHashMap<K,V>) relationships;
int[] values = relMap.getCitations((Integer)val);
if (getState() == State.LIVE) {
// only increment lookups and hits if we are live.
lookups++;
stats.lookups.incrementAndGet();
if (values!=null) {
hits++;
stats.hits.incrementAndGet();
}
}
return values;
}
}
/*
* This is a helper method allowing you to retrieve
* what we have directly using lucene docid
*/
public int[] getCitations(int docid) {
synchronized (relationships) {
RelationshipLinkedHashMap<K,V> relMap = (RelationshipLinkedHashMap<K,V>) relationships;
int[] val = relMap.getCitations(docid);
if (getState() == State.LIVE) {
// only increment lookups and hits if we are live.
lookups++;
stats.lookups.incrementAndGet();
if (val!=null) {
hits++;
stats.hits.incrementAndGet();
}
}
return val;
}
}
public int[] getReferences(K key) {
synchronized (relationships) {
V val = relationships.get(key);
if (val==null)
return null;
RelationshipLinkedHashMap<K,V> relMap = (RelationshipLinkedHashMap<K,V>) relationships;
int[] values = relMap.getReferences((Integer)val);
if (getState() == State.LIVE) {
// only increment lookups and hits if we are live.
lookups++;
stats.lookups.incrementAndGet();
if (values!=null) {
hits++;
stats.hits.incrementAndGet();
}
}
return values;
}
}
/*
* This is a helper method allowing you to retrieve
* what we have directly using lucene docid
*/
public int[] getReferences(int docid) {
synchronized (relationships) {
RelationshipLinkedHashMap<K,V> relMap = (RelationshipLinkedHashMap<K,V>) relationships;
int[] val = relMap.getReferences(docid);
if (getState() == State.LIVE) {
// only increment lookups and hits if we are live.
lookups++;
stats.lookups.incrementAndGet();
if (val!=null) {
hits++;
stats.hits.incrementAndGet();
}
}
return val;
}
}
public void clear() {
synchronized(relationships) {
relationships.clear();
}
}
private boolean isWarming = false;
private boolean purgeCache;
public boolean isWarmingOrWarmed() {
return isWarming;
}
public void warm(SolrIndexSearcher searcher, SolrCache<K,V> old) {
long warmingStartTime = System.nanoTime();
if (isAutowarmingOn()) {
isWarming = true;
try {
log.info("Warming cache (" + name() + "): " + searcher);
if (this.incremental ) {
warmIncrementally(searcher, old);
}
else {
warmRebuildEverything(searcher, old);
}
log.info("Warming cache done (# entries:" + relationships.size() + "): " + searcher);
}
catch (IOException e) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Failed to generate initial IDMapping", e);
}
sourceReaderHashCode = searcher.hashCode();
}
warmupTime = TimeUnit.MILLISECONDS.convert(System.nanoTime() - warmingStartTime, TimeUnit.NANOSECONDS);
}
private void warmRebuildEverything(SolrIndexSearcher searcher, SolrCache<K,V> old) throws IOException {
List<String> fields = getFields(searcher, this.identifierFields);
//if (this.referenceFields.length == 0 && this.citationFields.length == 0) {
// return;
//}
// builds the mapping from document ID's to lucene docids
unInvertedTheDamnThing(searcher, fields,
new KVSetter() {
@Override
@SuppressWarnings({ "unchecked" })
public void set (int docbase, int docid, Object value) {
if (treatIdentifiersAsText && value instanceof Integer) {
value = Integer.toString((Integer) value);
}
put((K) value, (V) (Integer) (docbase+docid));
}
}
);
if (this.referenceFields.length > 0 || this.citationFields.length > 0) {
@SuppressWarnings("rawtypes")
final RelationshipLinkedHashMap relMap = (RelationshipLinkedHashMap) relationships;
relMap.initializeCitationCache(searcher.maxDoc()); // TODO: touch only updated fields
unInvertedTheDamnThing(searcher, getFields(searcher, this.referenceFields),
new KVSetter() {
@Override
public void set (int docbase, int docid, Object value) {
synchronized (relMap) {
relMap.addReference(docbase+docid, value);
}
}
}
);
unInvertedTheDamnThing(searcher, getFields(searcher, this.citationFields),
new KVSetter() {
@Override
public void set (int docbase, int docid, Object value) {
synchronized (relMap) {
relMap.addCitation(docbase+docid, value);
}
}
}
);
if (this.citationFields.length == 0 && this.referenceFields.length > 0) {
relMap.inferCitationsFromReferences();
}
else if (this.citationFields.length > 0 && this.referenceFields.length == 0) {
relMap.inferReferencesFromCitations();
}
}
}
private void warmIncrementally(SolrIndexSearcher searcher, SolrCache<K,V> old) throws IOException {
if (regenerator==null) return;
List<String> fields = getFields(searcher, this.identifierFields);
CitationLRUCache<K,V> other = (CitationLRUCache<K,V>)old;
// collect ids of documents that need to be reloaded/regenerated during this
// warmup run
//System.out.println("searcher: " + searcher.toString());
//System.out.println("maxDoc: " + searcher.getIndexReader().maxDoc());
FixedBitSet toRefresh = new FixedBitSet(searcher.getIndexReader().maxDoc());
//System.out.println("version=" + searcher.getIndexReader().getVersion());
//try {
//System.out.println("commit=" + searcher.getIndexReader().getIndexCommit());
//} catch (IOException e2) {
// TODO Auto-generated catch block
//e2.printStackTrace();
//}
// for (IndexReaderContext c : searcher.getTopReaderContext().children()) {
// //System.out.println("context=" + c.reader().getCombinedCoreAndDeletesKey());
// }
// for (IndexReaderContext l : searcher.getIndexReader().leaves()) {
// //System.out.println(l);
// }
Bits liveDocs = searcher.getSlowAtomicReader().getLiveDocs();
//System.out.println(liveDocs == null ? "liveDocs=" + null : "liveDocs=" + liveDocs.length());
//System.out.println("numDeletes=" + searcher.getAtomicReader().numDeletedDocs());
if (liveDocs == null) { // everything is new, this could be fresh index or merged/optimized index too
//searcher.getAtomicReader().getContext().children().size()
//other.map.clear(); // force regeneration
toRefresh.set(0, toRefresh.length());
// Build the mapping from indexed values into lucene ids
// this must always be available, so we build it no matter what...
// XXX: make it update only the necessary IDs (not the whole index)
unInvertedTheDamnThing(searcher, fields, new KVSetter() {
@SuppressWarnings("unchecked")
@Override
public void set (int docbase, int docid, Object value) {
put((K) value, (V) (Integer) (docbase+docid));
}
}
);
}
else if (liveDocs != null) {
Integer luceneId;
for (V v: other.relationships.values()) {
luceneId = ((Integer) v);
if (luceneId <= liveDocs.length() && !liveDocs.get(luceneId)) { // doc was either deleted or updated
//System.out.println("Found deleted: " + luceneId);
// retrieve all citations/references for this luceneId and mark these docs to be refreshed
}
}
for (int i = 0; i < toRefresh.length(); i++) {
if (liveDocs.get(i)) {
toRefresh.set(i);
}
}
}
// warm entries
if (isAutowarmingOn()) {
Object[] keys,vals = null;
// Don't do the autowarming in the synchronized block, just pull out the keys and values.
synchronized (other.relationships) {
int sz = autowarm.getWarmCount(other.relationships.size());
keys = new Object[sz];
vals = new Object[sz];
Iterator<Map.Entry<K, V>> iter = other.relationships.entrySet().iterator();
// iteration goes from oldest (least recently used) to most recently used,
// so we need to skip over the oldest entries.
int skip = other.relationships.size() - sz;
for (int i=0; i<skip; i++) iter.next();
for (int i=0; i<sz; i++) {
Map.Entry<K,V> entry = iter.next();
keys[i]=entry.getKey();
vals[i]=entry.getValue();
}
}
// autowarm from the oldest to the newest entries so that the ordering will be
// correct in the new cache.
for (int i=0; i<keys.length; i++) {
try {
boolean continueRegen = true;
if (isModified(liveDocs, keys[i], vals[i])) {
toRefresh.set((Integer) keys[i]);
}
else {
continueRegen = regenerator.regenerateItem(searcher, this, old, keys[i], vals[i]);
}
if (!continueRegen) break;
}
catch (Throwable e) {
SolrException.log(log,"Error during auto-warming of key:" + keys[i], e);
}
}
}
}
private List<String> getFields(SolrIndexSearcher searcher, String[] listOfFields) {
List<String> out = new ArrayList<String>();
IndexSchema schema = searcher.getCore().getLatestSchema();
if (schema.getUniqueKeyField() == null) {
throw new SolrException(ErrorCode.FORBIDDEN, "Sorry, your schema is missing unique key and thus you probably have many duplicates. I won't continue");
}
for (String f: listOfFields) {
String fName = f.replace(":sorted", "");
SchemaField fieldInfo = schema.getField(fName);
FieldType type = fieldInfo.getType();
if (type.getNumericType() != null) {
synchronized (relationships) {
treatIdentifiersAsText = true;
}
}
if (!fieldInfo.stored() && type.getDocValuesFormat().equals(DocValuesType.NONE)) {
throw new SolrException(ErrorCode.FORBIDDEN, "The field " + f + " cannot be used to build citation cache!");
}
out.add(fName);
}
return out;
}
/*
* Checks whether the cache needs to be rebuilt for this
* document, eg. if the key points to a deleted document
* or if one of the values point at a deleted document
*/
private boolean isModified(Bits liveDocs, Object cacheKey, Object cacheValue) {
/*
if (!liveDocs.get((Integer) get((K)cacheKey))) { // doc is deleted
return true;
}
for (Integer luceneId: (Integer[]) cacheValue) {
if (!liveDocs.get(luceneId) || luceneId == -1) { // some of the linked docs was deleted or unrecognized
return true;
}
}
*/
return false;
}
public void close() {
}
/*
* Reads values from the DocValue and/or FieldCache and calls the
* setter
*/
private class Transformer {
public void process(int docBase, int docid) {
throw new NotImplementedException();
}
}
private class KVSetter {
@SuppressWarnings({ "unchecked" })
public void set (int docbase, int docid, Object value) {
throw new NotImplementedException();
}
}
/*
* Given the set of fields, we'll look inside them and retrieve (into memory)
* all values
*/
private void unInvertedTheDamnThing(
SolrIndexSearcher searcher,
List<String> fields,
KVSetter setter) throws IOException {
IndexSchema schema = searcher.getCore().getLatestSchema();
List<LeafReaderContext> leaves = searcher.getIndexReader().getContext().leaves();
Bits liveDocs;
LeafReader lr;
Transformer transformer;
for (LeafReaderContext leave: leaves) {
int docBase = leave.docBase;
liveDocs = leave.reader().getLiveDocs();
lr = leave.reader();
FieldInfos fInfo = lr.getFieldInfos();
for (String field: fields) {
FieldInfo fi = fInfo.fieldInfo(field);
if (fi == null) {
log.error("Field " + field + " has no schema entry; skipping it!");
continue;
}
SchemaField fSchema = schema.getField(field);
DocValuesType fType = fi.getDocValuesType();
Map<String,Type> mapping = new HashMap<String,Type>();
final LeafReader unReader;
if (fType.equals(DocValuesType.NONE)) {
Class<? extends DocValuesType> c = fType.getClass();
if (c.isAssignableFrom(TextField.class) || c.isAssignableFrom(StrField.class)) {
if (fSchema.multiValued()) {
mapping.put(field, Type.SORTED);
}
else {
mapping.put(field, Type.BINARY);
}
}
else if (c.isAssignableFrom(TrieIntField.class)) {
if (fSchema.multiValued()) {
mapping.put(field, Type.SORTED_SET_INTEGER);
}
else {
mapping.put(field, Type.INTEGER_POINT);
}
}
else {
continue;
}
unReader = new UninvertingReader(lr, mapping);
}
else {
unReader = lr;
}
switch(fType) {
case NUMERIC:
transformer = new Transformer() {
NumericDocValues dv = unReader.getNumericDocValues(field);
@Override
public void process(int docBase, int docId) {
int v = (int) dv.get(docId);
setter.set(docBase, docId, v);
}
};
break;
case SORTED_NUMERIC:
transformer = new Transformer() {
SortedNumericDocValues dv = unReader.getSortedNumericDocValues(field);
@Override
public void process(int docBase, int docId) {
dv.setDocument(docId);
int max = dv.count();
int v;
for (int i=0; i<max; i++) {
v = (int) dv.valueAt(i);
setter.set(docBase, docId, v);
}
}
};
break;
case SORTED_SET:
transformer = new Transformer() {
SortedSetDocValues dv = unReader.getSortedSetDocValues(field);
int errs = 0;
@Override
public void process(int docBase, int docId) {
if (errs > 5)
return;
dv.setDocument(docId);
for (long ord = dv.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = dv.nextOrd()) {
final BytesRef value = dv.lookupOrd(ord);
setter.set(docBase, docId, value.utf8ToString());
}
}
};
break;
case SORTED:
transformer = new Transformer() {
SortedDocValues dv = unReader.getSortedDocValues(field);
TermsEnum te;
@Override
public void process(int docBase, int docId) {
BytesRef v = dv.get(docId);
if (v.length == 0)
return;
setter.set(docBase, docId, v.utf8ToString());
}
};
break;
default:
throw new IllegalArgumentException("The field " + field + " is of type that cannot be un-inverted");
}
int i = 0;
while(i < lr.maxDoc()) {
if (liveDocs != null && !(i < liveDocs.length() && liveDocs.get(i))) {
i++;
continue;
}
transformer.process(docBase, i);
i++;
}
}
}
}
//////////////////////// SolrInfoMBeans methods //////////////////////
public String getName() {
return CitationLRUCache.class.getName();
}
public String getDescription() {
return description;
}
public String getSource() {
return "$URL: http://svn.apache.org/repos/asf/lucene/dev/branches/lucene_solr_4_0/solr/core/src/java/org/apache/solr/search/LRUCache.java $";
}
@SuppressWarnings({ "rawtypes", "unchecked" })
public NamedList getStatistics() {
NamedList lst = new SimpleOrderedMap();
synchronized (relationships) {
lst.add("lookups", lookups);
lst.add("hits", hits);
lst.add("hitratio", calcHitRatio(lookups,hits));
lst.add("inserts", inserts);
lst.add("evictions", evictions);
lst.add("size", relationships.size());
}
lst.add("warmupTime", warmupTime);
long clookups = stats.lookups.get();
long chits = stats.hits.get();
lst.add("cumulative_lookups", clookups);
lst.add("cumulative_hits", chits);
lst.add("cumulative_hitratio", calcHitRatio(clookups,chits));
lst.add("cumulative_inserts", stats.inserts.get());
lst.add("cumulative_evictions", stats.evictions.get());
return lst;
}
@Override
public String toString() {
return name() + getStatistics().toString();
}
@Override
public int hashCode() {
return referenceFields.hashCode() ^ identifierFields.hashCode() ^ sourceReaderHashCode;
}
public String identifierString() {
StringBuffer out = new StringBuffer();
out.append("CitationLRUCache(");
out.append("idfields:");
out.append(Arrays.toString(identifierFields));
if (referenceFields.length > 0) {
out.append(", valfields:");
out.append(Arrays.toString(referenceFields));
}
out.append(")");
return out.toString();
}
public static class SimpleRegenerator implements CacheRegenerator {
@SuppressWarnings({ "unchecked", "rawtypes" })
public boolean regenerateItem(SolrIndexSearcher newSearcher,
SolrCache newCache,
SolrCache oldCache,
Object oldKey,
Object oldVal)
throws IOException {
newCache.put(oldKey,oldVal);
return true;
}
};
/**
* Efficient resizable auto-expanding list holding <code>int</code> elements;
* implemented with arrays.
*/
private static final class ArrayIntList {
private int[] elements;
private int size = 0;
public ArrayIntList(int initialCapacity) {
elements = new int[initialCapacity];
}
public void add(int elem) {
if (size == elements.length) ensureCapacity(size + 1);
elements[size++] = elem;
}
public int[] getElements() {
int[] out = new int[size];
System.arraycopy(elements, 0, out, 0, size);
return out;
}
public int get(int index) {
if (index >= size) throwIndex(index);
return elements[index];
}
public int size() {
return size;
}
private void ensureCapacity(int minCapacity) {
int newCapacity = Math.max(minCapacity, (elements.length * 3) / 2 + 1);
int[] newElements = new int[newCapacity];
System.arraycopy(elements, 0, newElements, 0, size);
elements = newElements;
}
private void throwIndex(int index) {
throw new IndexOutOfBoundsException("index: " + index
+ ", size: " + size);
}
public String toString() {
return Arrays.toString(elements);
}
/** returns the first few positions (without offsets); debug only */
@SuppressWarnings("unused")
public String toString(int stride) {
int s = size() / stride;
int len = Math.min(10, s); // avoid printing huge lists
StringBuilder buf = new StringBuilder(4*len);
buf.append("[");
for (int i = 0; i < len; i++) {
buf.append(get(i*stride));
if (i < len-1) buf.append(", ");
}
if (len != s) buf.append(", ..."); // and some more...
buf.append("]");
return buf.toString();
}
}
/*
* The main datastructure holding information about the lucene documents.
*
* For speed purposes, the data gets loaded into RAM; we have those pieces
*
* - mapping: key -> lucene docid
* - references: docid -> many other docids
* - citations: docid -> many other docids
*
* Until I implement dynamic loading of data, this cache
* will always grow to the maxdoc size, so that no
* evictions happen
*/
@SuppressWarnings("hiding")
public class RelationshipLinkedHashMap<K,V> extends LinkedHashMap<K,V> {
private static final long serialVersionUID = -356203002886265188L;
int slimit;
List<ArrayIntList> references;
List<ArrayIntList> citations;
public RelationshipLinkedHashMap (int initialSize, float ratio, boolean accessOrder,
int limit, Float sizeInPercent) {
super(initialSize, ratio, accessOrder);
slimit = limit;
references = new ArrayList<ArrayIntList>(0); // just to prevent NPE - normally, is
citations = new ArrayList<ArrayIntList>(0); // initialized in initializeCitationCache
}
@SuppressWarnings("rawtypes")
@Override
protected boolean removeEldestEntry(Map.Entry eldest) {
return false;
/*
if (size() > slimit) {
// increment evictions regardless of state.
// this doesn't need to be synchronized because it will
// only be called in the context of a higher level synchronized block.
evictions++;
stats.evictions.incrementAndGet();
return true;
}
return false;
*/
}
public int[] getReferences(int docid) {
if (docid < references.size() && references.get(docid) != null) {
ArrayIntList c = references.get(docid);
if (c != null)
return c.getElements();
}
return null;
}
public Iterator<int[][]> getRelationshipsIterator() {
return new CitationDataIterator();
}
public int relationshipsDataSize() {
return citations.size();
}
public int[] getCitations(int docid) {
if (docid < citations.size() && citations.get(docid) != null) {
ArrayIntList c = citations.get(docid);
if (c != null)
return c.getElements();
}
return null;
}
public void initializeCitationCache(int maxDocSize) {
references = new ArrayList<ArrayIntList>(maxDocSize);
citations = new ArrayList<ArrayIntList>(maxDocSize);
// i was hoping thi sis not necessary, but set(index, value)
// throws errors otherwise
for (int i=0;i<maxDocSize;i++) {
references.add(null);
citations.add(null);
}
}
public void addReference(int sourceDocid, Object value) {
//System.out.println("addReference(" + sourceDocid + ", " + value + ")");
if (this.containsKey(value)) {
addReference(sourceDocid, (Integer) this.get(value));
}
else {
//addReference(sourceDocid, -1);
}
}
public void addReference(int sourceDocid, Integer targetDocid) {
_add(references, sourceDocid, targetDocid);
}
public void addCitation(int sourceDocid, Object value) {
//System.out.println("addCitation(" + sourceDocid + ", " + value + ")");
if (this.containsKey(value)) {
addCitation(sourceDocid, (Integer) this.get(value));
}
else {
//addCitation(sourceDocid, -1);
}
}
public void addCitation(int sourceDocid, Integer targetDocid) {
//System.out.println("addCitation(" + sourceDocid + "," + targetDocid+")");
_add(citations, sourceDocid, targetDocid);
}
private void _add(List<ArrayIntList> target, int sourceDocid, int targetDocid) {
//System.out.println("_add(" + sourceDocid + "," + targetDocid+")");
if (target.get(sourceDocid) == null) {
ArrayIntList pointer = new ArrayIntList(1);
pointer.add(targetDocid);
target.set(sourceDocid, pointer);
}
else {
target.get(sourceDocid).add(targetDocid);
}
}
public void inferCitationsFromReferences() {
int i = -1;
for (ArrayIntList refs : references) {
i += 1;
if (refs == null) {
continue;
}
for (int j=0; j<refs.size();j++) {
if (refs.get(j) == -1)
continue;
addCitation(refs.get(j),i);
}
}
}
public void inferReferencesFromCitations() {
int i = -1;
for (ArrayIntList refs : citations) {
i += 1;
if (refs == null) {
continue;
}
for (int j=0; j<refs.size();j++) {
if (refs.get(j) == -1)
continue;
addReference(refs.get(j),i);
}
}
}
private class CitationDataIterator implements Iterator<int[][]> {
int cursor = 0; // index of next element to return
public boolean hasNext() {
return cursor != citations.size();
}
public int[][] next() {
int i = cursor;
if (i >= citations.size())
throw new NoSuchElementException();
int[][] out = new int[2][];
ArrayIntList v1 = references.get(cursor);
ArrayIntList v2 = citations.get(cursor);
out[0] = v1 != null ? v1.getElements() : new int[0];
out[1] = v2 != null ? v2.getElements() : new int[0];
cursor = i + 1;
return out;
}
public void remove() {
throw new UnsupportedOperationException();
}
}
};
}