/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.search; import java.io.IOException; import java.lang.invoke.MethodHandles; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import org.apache.commons.lang.NotImplementedException; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FixedBitSet; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.schema.FieldType; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.StrField; import org.apache.solr.schema.TextField; import org.apache.solr.schema.TrieIntField; import org.apache.solr.uninverting.UninvertingReader; import org.apache.solr.uninverting.UninvertingReader.Type; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Implementation of a cache for second order operations. This cache * will first construct a mapping from identifiers to lucene ids. * Next, it will read all values from a document field and build * in-memory data structure that can be used to tell what documents * are related. * * For the time being, we read whole index into memory to create * a citation network, but this implementation should also be * capable of holding only partial (the most accessed) citation * network in memory. However, the initial mapping (value<->lucene id) * will always be constructed in its entirety. * */ public class CitationLRUCache<K,V> extends SolrCacheBase implements SolrCache<K,V> { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); /* An instance of this class will be shared across multiple instances * of an LRUCache at the same time. Make sure everything is thread safe. */ private static class CumulativeStats { AtomicLong lookups = new AtomicLong(); AtomicLong hits = new AtomicLong(); AtomicLong inserts = new AtomicLong(); AtomicLong evictions = new AtomicLong(); } private CumulativeStats stats; // per instance stats. The synchronization used for the map will also be // used for updating these statistics (and hence they are not AtomicLongs private long lookups; private long hits; private long inserts; private long evictions; private long warmupTime = 0; private String description="Citation LRU Cache"; // the main objects private LinkedHashMap<K,V> relationships; private String[] referenceFields; private String[] citationFields; private String[] identifierFields = null; private int sourceReaderHashCode = 0; // If we detect that you are mixing int and text fields // we'll treat all values (mappings) as text values private boolean treatIdentifiersAsText = false; // TODO: i'm planning to add the ability to build the cache // incrementally (ie per index segment), but it may // not be necessary as we are going to denormalize // citation data outside solr and prepare everything there... private boolean incremental = false; private boolean reuseCache; @SuppressWarnings({ "unchecked", "rawtypes" }) public Object init(Map args, Object persistence, CacheRegenerator regenerator) { super.init(args, regenerator); identifierFields = ((String)args.get("identifierFields")).split(","); assert (identifierFields != null && identifierFields.length > 0); incremental = "true".equals(((String)args.get("incremental"))); reuseCache = "true".equals(((String)args.get("reuseCache"))); citationFields = new String[0]; referenceFields = new String[0]; if (args.containsKey("referenceFields") && ((String)args.get("referenceFields")).trim().length() > 0) { referenceFields = ((String)args.get("referenceFields")).split(","); } if (args.containsKey("citationFields") && ((String)args.get("citationFields")).trim().length() > 0) { citationFields = ((String)args.get("citationFields")).split(","); } Float sizeInPercent = null; String str = (String)args.get("size"); if (str != null && str.endsWith("%")) { str = str.substring(0, str.length()-1); sizeInPercent = Integer.parseInt(str) / 100f; } final int limit = str==null ? 1024 : Integer.parseInt(str); str = (String)args.get("initialSize"); final int initialSize = Math.min(str==null ? 1024 : Integer.parseInt(str), limit); description = generateDescription(limit, initialSize); relationships = new RelationshipLinkedHashMap<K,V>(initialSize, 0.75f, true, limit, sizeInPercent); if (persistence==null) { // must be the first time a cache of this type is being created persistence = new CumulativeStats(); } stats = (CumulativeStats)persistence; return persistence; } /** * * @return Returns the description of this cache. */ private String generateDescription(int limit, int initialSize) { String description = "CitationLRU Cache(maxSize=" + limit + ", initialSize=" + initialSize; if (isAutowarmingOn()) { description += ", " + getAutowarmDescription(); } description += ')'; return description; } public int size() { synchronized(relationships) { return relationships.size(); } } public boolean treatsIdentifiersAsText() { return treatIdentifiersAsText; } public V put(K key, V value) { //System.out.println("put(" + key + "," + value+")"); synchronized (relationships) { if (getState() == State.LIVE) { stats.inserts.incrementAndGet(); } // increment local inserts regardless of state??? // it does make it more consistent with the current size... inserts++; return relationships.put(key,value); } } public V get(K key) { synchronized (relationships) { V val = relationships.get(key); if (getState() == State.LIVE) { // only increment lookups and hits if we are live. lookups++; stats.lookups.incrementAndGet(); if (val!=null) { hits++; stats.hits.incrementAndGet(); } } return val; } } /* * This method should be used only for very specific purposes of * dumping the citation cache (or accessing all elements of * the cache). Access to the map is not synchronized, but you * are iterating over a copy of data - so yo cannot change it * * The first comes references, the second are citations */ public Iterator<int[][]> getCitationsIterator() { return ((RelationshipLinkedHashMap<K,V>) relationships).getRelationshipsIterator(); } public int getCitationsIteratorSize() { synchronized (relationships) { return ((RelationshipLinkedHashMap<K,V>) relationships).relationshipsDataSize(); } } public int[] getCitations(K key) { synchronized (relationships) { V val = relationships.get(key); if (val==null) return null; RelationshipLinkedHashMap<K,V> relMap = (RelationshipLinkedHashMap<K,V>) relationships; int[] values = relMap.getCitations((Integer)val); if (getState() == State.LIVE) { // only increment lookups and hits if we are live. lookups++; stats.lookups.incrementAndGet(); if (values!=null) { hits++; stats.hits.incrementAndGet(); } } return values; } } /* * This is a helper method allowing you to retrieve * what we have directly using lucene docid */ public int[] getCitations(int docid) { synchronized (relationships) { RelationshipLinkedHashMap<K,V> relMap = (RelationshipLinkedHashMap<K,V>) relationships; int[] val = relMap.getCitations(docid); if (getState() == State.LIVE) { // only increment lookups and hits if we are live. lookups++; stats.lookups.incrementAndGet(); if (val!=null) { hits++; stats.hits.incrementAndGet(); } } return val; } } public int[] getReferences(K key) { synchronized (relationships) { V val = relationships.get(key); if (val==null) return null; RelationshipLinkedHashMap<K,V> relMap = (RelationshipLinkedHashMap<K,V>) relationships; int[] values = relMap.getReferences((Integer)val); if (getState() == State.LIVE) { // only increment lookups and hits if we are live. lookups++; stats.lookups.incrementAndGet(); if (values!=null) { hits++; stats.hits.incrementAndGet(); } } return values; } } /* * This is a helper method allowing you to retrieve * what we have directly using lucene docid */ public int[] getReferences(int docid) { synchronized (relationships) { RelationshipLinkedHashMap<K,V> relMap = (RelationshipLinkedHashMap<K,V>) relationships; int[] val = relMap.getReferences(docid); if (getState() == State.LIVE) { // only increment lookups and hits if we are live. lookups++; stats.lookups.incrementAndGet(); if (val!=null) { hits++; stats.hits.incrementAndGet(); } } return val; } } public void clear() { synchronized(relationships) { relationships.clear(); } } private boolean isWarming = false; private boolean purgeCache; public boolean isWarmingOrWarmed() { return isWarming; } public void warm(SolrIndexSearcher searcher, SolrCache<K,V> old) { long warmingStartTime = System.nanoTime(); if (isAutowarmingOn()) { isWarming = true; try { log.info("Warming cache (" + name() + "): " + searcher); if (this.incremental ) { warmIncrementally(searcher, old); } else { warmRebuildEverything(searcher, old); } log.info("Warming cache done (# entries:" + relationships.size() + "): " + searcher); } catch (IOException e) { throw new SolrException(ErrorCode.SERVER_ERROR, "Failed to generate initial IDMapping", e); } sourceReaderHashCode = searcher.hashCode(); } warmupTime = TimeUnit.MILLISECONDS.convert(System.nanoTime() - warmingStartTime, TimeUnit.NANOSECONDS); } private void warmRebuildEverything(SolrIndexSearcher searcher, SolrCache<K,V> old) throws IOException { List<String> fields = getFields(searcher, this.identifierFields); //if (this.referenceFields.length == 0 && this.citationFields.length == 0) { // return; //} // builds the mapping from document ID's to lucene docids unInvertedTheDamnThing(searcher, fields, new KVSetter() { @Override @SuppressWarnings({ "unchecked" }) public void set (int docbase, int docid, Object value) { if (treatIdentifiersAsText && value instanceof Integer) { value = Integer.toString((Integer) value); } put((K) value, (V) (Integer) (docbase+docid)); } } ); if (this.referenceFields.length > 0 || this.citationFields.length > 0) { @SuppressWarnings("rawtypes") final RelationshipLinkedHashMap relMap = (RelationshipLinkedHashMap) relationships; relMap.initializeCitationCache(searcher.maxDoc()); // TODO: touch only updated fields unInvertedTheDamnThing(searcher, getFields(searcher, this.referenceFields), new KVSetter() { @Override public void set (int docbase, int docid, Object value) { synchronized (relMap) { relMap.addReference(docbase+docid, value); } } } ); unInvertedTheDamnThing(searcher, getFields(searcher, this.citationFields), new KVSetter() { @Override public void set (int docbase, int docid, Object value) { synchronized (relMap) { relMap.addCitation(docbase+docid, value); } } } ); if (this.citationFields.length == 0 && this.referenceFields.length > 0) { relMap.inferCitationsFromReferences(); } else if (this.citationFields.length > 0 && this.referenceFields.length == 0) { relMap.inferReferencesFromCitations(); } } } private void warmIncrementally(SolrIndexSearcher searcher, SolrCache<K,V> old) throws IOException { if (regenerator==null) return; List<String> fields = getFields(searcher, this.identifierFields); CitationLRUCache<K,V> other = (CitationLRUCache<K,V>)old; // collect ids of documents that need to be reloaded/regenerated during this // warmup run //System.out.println("searcher: " + searcher.toString()); //System.out.println("maxDoc: " + searcher.getIndexReader().maxDoc()); FixedBitSet toRefresh = new FixedBitSet(searcher.getIndexReader().maxDoc()); //System.out.println("version=" + searcher.getIndexReader().getVersion()); //try { //System.out.println("commit=" + searcher.getIndexReader().getIndexCommit()); //} catch (IOException e2) { // TODO Auto-generated catch block //e2.printStackTrace(); //} // for (IndexReaderContext c : searcher.getTopReaderContext().children()) { // //System.out.println("context=" + c.reader().getCombinedCoreAndDeletesKey()); // } // for (IndexReaderContext l : searcher.getIndexReader().leaves()) { // //System.out.println(l); // } Bits liveDocs = searcher.getSlowAtomicReader().getLiveDocs(); //System.out.println(liveDocs == null ? "liveDocs=" + null : "liveDocs=" + liveDocs.length()); //System.out.println("numDeletes=" + searcher.getAtomicReader().numDeletedDocs()); if (liveDocs == null) { // everything is new, this could be fresh index or merged/optimized index too //searcher.getAtomicReader().getContext().children().size() //other.map.clear(); // force regeneration toRefresh.set(0, toRefresh.length()); // Build the mapping from indexed values into lucene ids // this must always be available, so we build it no matter what... // XXX: make it update only the necessary IDs (not the whole index) unInvertedTheDamnThing(searcher, fields, new KVSetter() { @SuppressWarnings("unchecked") @Override public void set (int docbase, int docid, Object value) { put((K) value, (V) (Integer) (docbase+docid)); } } ); } else if (liveDocs != null) { Integer luceneId; for (V v: other.relationships.values()) { luceneId = ((Integer) v); if (luceneId <= liveDocs.length() && !liveDocs.get(luceneId)) { // doc was either deleted or updated //System.out.println("Found deleted: " + luceneId); // retrieve all citations/references for this luceneId and mark these docs to be refreshed } } for (int i = 0; i < toRefresh.length(); i++) { if (liveDocs.get(i)) { toRefresh.set(i); } } } // warm entries if (isAutowarmingOn()) { Object[] keys,vals = null; // Don't do the autowarming in the synchronized block, just pull out the keys and values. synchronized (other.relationships) { int sz = autowarm.getWarmCount(other.relationships.size()); keys = new Object[sz]; vals = new Object[sz]; Iterator<Map.Entry<K, V>> iter = other.relationships.entrySet().iterator(); // iteration goes from oldest (least recently used) to most recently used, // so we need to skip over the oldest entries. int skip = other.relationships.size() - sz; for (int i=0; i<skip; i++) iter.next(); for (int i=0; i<sz; i++) { Map.Entry<K,V> entry = iter.next(); keys[i]=entry.getKey(); vals[i]=entry.getValue(); } } // autowarm from the oldest to the newest entries so that the ordering will be // correct in the new cache. for (int i=0; i<keys.length; i++) { try { boolean continueRegen = true; if (isModified(liveDocs, keys[i], vals[i])) { toRefresh.set((Integer) keys[i]); } else { continueRegen = regenerator.regenerateItem(searcher, this, old, keys[i], vals[i]); } if (!continueRegen) break; } catch (Throwable e) { SolrException.log(log,"Error during auto-warming of key:" + keys[i], e); } } } } private List<String> getFields(SolrIndexSearcher searcher, String[] listOfFields) { List<String> out = new ArrayList<String>(); IndexSchema schema = searcher.getCore().getLatestSchema(); if (schema.getUniqueKeyField() == null) { throw new SolrException(ErrorCode.FORBIDDEN, "Sorry, your schema is missing unique key and thus you probably have many duplicates. I won't continue"); } for (String f: listOfFields) { String fName = f.replace(":sorted", ""); SchemaField fieldInfo = schema.getField(fName); FieldType type = fieldInfo.getType(); if (type.getNumericType() != null) { synchronized (relationships) { treatIdentifiersAsText = true; } } if (!fieldInfo.stored() && type.getDocValuesFormat().equals(DocValuesType.NONE)) { throw new SolrException(ErrorCode.FORBIDDEN, "The field " + f + " cannot be used to build citation cache!"); } out.add(fName); } return out; } /* * Checks whether the cache needs to be rebuilt for this * document, eg. if the key points to a deleted document * or if one of the values point at a deleted document */ private boolean isModified(Bits liveDocs, Object cacheKey, Object cacheValue) { /* if (!liveDocs.get((Integer) get((K)cacheKey))) { // doc is deleted return true; } for (Integer luceneId: (Integer[]) cacheValue) { if (!liveDocs.get(luceneId) || luceneId == -1) { // some of the linked docs was deleted or unrecognized return true; } } */ return false; } public void close() { } /* * Reads values from the DocValue and/or FieldCache and calls the * setter */ private class Transformer { public void process(int docBase, int docid) { throw new NotImplementedException(); } } private class KVSetter { @SuppressWarnings({ "unchecked" }) public void set (int docbase, int docid, Object value) { throw new NotImplementedException(); } } /* * Given the set of fields, we'll look inside them and retrieve (into memory) * all values */ private void unInvertedTheDamnThing( SolrIndexSearcher searcher, List<String> fields, KVSetter setter) throws IOException { IndexSchema schema = searcher.getCore().getLatestSchema(); List<LeafReaderContext> leaves = searcher.getIndexReader().getContext().leaves(); Bits liveDocs; LeafReader lr; Transformer transformer; for (LeafReaderContext leave: leaves) { int docBase = leave.docBase; liveDocs = leave.reader().getLiveDocs(); lr = leave.reader(); FieldInfos fInfo = lr.getFieldInfos(); for (String field: fields) { FieldInfo fi = fInfo.fieldInfo(field); if (fi == null) { log.error("Field " + field + " has no schema entry; skipping it!"); continue; } SchemaField fSchema = schema.getField(field); DocValuesType fType = fi.getDocValuesType(); Map<String,Type> mapping = new HashMap<String,Type>(); final LeafReader unReader; if (fType.equals(DocValuesType.NONE)) { Class<? extends DocValuesType> c = fType.getClass(); if (c.isAssignableFrom(TextField.class) || c.isAssignableFrom(StrField.class)) { if (fSchema.multiValued()) { mapping.put(field, Type.SORTED); } else { mapping.put(field, Type.BINARY); } } else if (c.isAssignableFrom(TrieIntField.class)) { if (fSchema.multiValued()) { mapping.put(field, Type.SORTED_SET_INTEGER); } else { mapping.put(field, Type.INTEGER_POINT); } } else { continue; } unReader = new UninvertingReader(lr, mapping); } else { unReader = lr; } switch(fType) { case NUMERIC: transformer = new Transformer() { NumericDocValues dv = unReader.getNumericDocValues(field); @Override public void process(int docBase, int docId) { int v = (int) dv.get(docId); setter.set(docBase, docId, v); } }; break; case SORTED_NUMERIC: transformer = new Transformer() { SortedNumericDocValues dv = unReader.getSortedNumericDocValues(field); @Override public void process(int docBase, int docId) { dv.setDocument(docId); int max = dv.count(); int v; for (int i=0; i<max; i++) { v = (int) dv.valueAt(i); setter.set(docBase, docId, v); } } }; break; case SORTED_SET: transformer = new Transformer() { SortedSetDocValues dv = unReader.getSortedSetDocValues(field); int errs = 0; @Override public void process(int docBase, int docId) { if (errs > 5) return; dv.setDocument(docId); for (long ord = dv.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = dv.nextOrd()) { final BytesRef value = dv.lookupOrd(ord); setter.set(docBase, docId, value.utf8ToString()); } } }; break; case SORTED: transformer = new Transformer() { SortedDocValues dv = unReader.getSortedDocValues(field); TermsEnum te; @Override public void process(int docBase, int docId) { BytesRef v = dv.get(docId); if (v.length == 0) return; setter.set(docBase, docId, v.utf8ToString()); } }; break; default: throw new IllegalArgumentException("The field " + field + " is of type that cannot be un-inverted"); } int i = 0; while(i < lr.maxDoc()) { if (liveDocs != null && !(i < liveDocs.length() && liveDocs.get(i))) { i++; continue; } transformer.process(docBase, i); i++; } } } } //////////////////////// SolrInfoMBeans methods ////////////////////// public String getName() { return CitationLRUCache.class.getName(); } public String getDescription() { return description; } public String getSource() { return "$URL: http://svn.apache.org/repos/asf/lucene/dev/branches/lucene_solr_4_0/solr/core/src/java/org/apache/solr/search/LRUCache.java $"; } @SuppressWarnings({ "rawtypes", "unchecked" }) public NamedList getStatistics() { NamedList lst = new SimpleOrderedMap(); synchronized (relationships) { lst.add("lookups", lookups); lst.add("hits", hits); lst.add("hitratio", calcHitRatio(lookups,hits)); lst.add("inserts", inserts); lst.add("evictions", evictions); lst.add("size", relationships.size()); } lst.add("warmupTime", warmupTime); long clookups = stats.lookups.get(); long chits = stats.hits.get(); lst.add("cumulative_lookups", clookups); lst.add("cumulative_hits", chits); lst.add("cumulative_hitratio", calcHitRatio(clookups,chits)); lst.add("cumulative_inserts", stats.inserts.get()); lst.add("cumulative_evictions", stats.evictions.get()); return lst; } @Override public String toString() { return name() + getStatistics().toString(); } @Override public int hashCode() { return referenceFields.hashCode() ^ identifierFields.hashCode() ^ sourceReaderHashCode; } public String identifierString() { StringBuffer out = new StringBuffer(); out.append("CitationLRUCache("); out.append("idfields:"); out.append(Arrays.toString(identifierFields)); if (referenceFields.length > 0) { out.append(", valfields:"); out.append(Arrays.toString(referenceFields)); } out.append(")"); return out.toString(); } public static class SimpleRegenerator implements CacheRegenerator { @SuppressWarnings({ "unchecked", "rawtypes" }) public boolean regenerateItem(SolrIndexSearcher newSearcher, SolrCache newCache, SolrCache oldCache, Object oldKey, Object oldVal) throws IOException { newCache.put(oldKey,oldVal); return true; } }; /** * Efficient resizable auto-expanding list holding <code>int</code> elements; * implemented with arrays. */ private static final class ArrayIntList { private int[] elements; private int size = 0; public ArrayIntList(int initialCapacity) { elements = new int[initialCapacity]; } public void add(int elem) { if (size == elements.length) ensureCapacity(size + 1); elements[size++] = elem; } public int[] getElements() { int[] out = new int[size]; System.arraycopy(elements, 0, out, 0, size); return out; } public int get(int index) { if (index >= size) throwIndex(index); return elements[index]; } public int size() { return size; } private void ensureCapacity(int minCapacity) { int newCapacity = Math.max(minCapacity, (elements.length * 3) / 2 + 1); int[] newElements = new int[newCapacity]; System.arraycopy(elements, 0, newElements, 0, size); elements = newElements; } private void throwIndex(int index) { throw new IndexOutOfBoundsException("index: " + index + ", size: " + size); } public String toString() { return Arrays.toString(elements); } /** returns the first few positions (without offsets); debug only */ @SuppressWarnings("unused") public String toString(int stride) { int s = size() / stride; int len = Math.min(10, s); // avoid printing huge lists StringBuilder buf = new StringBuilder(4*len); buf.append("["); for (int i = 0; i < len; i++) { buf.append(get(i*stride)); if (i < len-1) buf.append(", "); } if (len != s) buf.append(", ..."); // and some more... buf.append("]"); return buf.toString(); } } /* * The main datastructure holding information about the lucene documents. * * For speed purposes, the data gets loaded into RAM; we have those pieces * * - mapping: key -> lucene docid * - references: docid -> many other docids * - citations: docid -> many other docids * * Until I implement dynamic loading of data, this cache * will always grow to the maxdoc size, so that no * evictions happen */ @SuppressWarnings("hiding") public class RelationshipLinkedHashMap<K,V> extends LinkedHashMap<K,V> { private static final long serialVersionUID = -356203002886265188L; int slimit; List<ArrayIntList> references; List<ArrayIntList> citations; public RelationshipLinkedHashMap (int initialSize, float ratio, boolean accessOrder, int limit, Float sizeInPercent) { super(initialSize, ratio, accessOrder); slimit = limit; references = new ArrayList<ArrayIntList>(0); // just to prevent NPE - normally, is citations = new ArrayList<ArrayIntList>(0); // initialized in initializeCitationCache } @SuppressWarnings("rawtypes") @Override protected boolean removeEldestEntry(Map.Entry eldest) { return false; /* if (size() > slimit) { // increment evictions regardless of state. // this doesn't need to be synchronized because it will // only be called in the context of a higher level synchronized block. evictions++; stats.evictions.incrementAndGet(); return true; } return false; */ } public int[] getReferences(int docid) { if (docid < references.size() && references.get(docid) != null) { ArrayIntList c = references.get(docid); if (c != null) return c.getElements(); } return null; } public Iterator<int[][]> getRelationshipsIterator() { return new CitationDataIterator(); } public int relationshipsDataSize() { return citations.size(); } public int[] getCitations(int docid) { if (docid < citations.size() && citations.get(docid) != null) { ArrayIntList c = citations.get(docid); if (c != null) return c.getElements(); } return null; } public void initializeCitationCache(int maxDocSize) { references = new ArrayList<ArrayIntList>(maxDocSize); citations = new ArrayList<ArrayIntList>(maxDocSize); // i was hoping thi sis not necessary, but set(index, value) // throws errors otherwise for (int i=0;i<maxDocSize;i++) { references.add(null); citations.add(null); } } public void addReference(int sourceDocid, Object value) { //System.out.println("addReference(" + sourceDocid + ", " + value + ")"); if (this.containsKey(value)) { addReference(sourceDocid, (Integer) this.get(value)); } else { //addReference(sourceDocid, -1); } } public void addReference(int sourceDocid, Integer targetDocid) { _add(references, sourceDocid, targetDocid); } public void addCitation(int sourceDocid, Object value) { //System.out.println("addCitation(" + sourceDocid + ", " + value + ")"); if (this.containsKey(value)) { addCitation(sourceDocid, (Integer) this.get(value)); } else { //addCitation(sourceDocid, -1); } } public void addCitation(int sourceDocid, Integer targetDocid) { //System.out.println("addCitation(" + sourceDocid + "," + targetDocid+")"); _add(citations, sourceDocid, targetDocid); } private void _add(List<ArrayIntList> target, int sourceDocid, int targetDocid) { //System.out.println("_add(" + sourceDocid + "," + targetDocid+")"); if (target.get(sourceDocid) == null) { ArrayIntList pointer = new ArrayIntList(1); pointer.add(targetDocid); target.set(sourceDocid, pointer); } else { target.get(sourceDocid).add(targetDocid); } } public void inferCitationsFromReferences() { int i = -1; for (ArrayIntList refs : references) { i += 1; if (refs == null) { continue; } for (int j=0; j<refs.size();j++) { if (refs.get(j) == -1) continue; addCitation(refs.get(j),i); } } } public void inferReferencesFromCitations() { int i = -1; for (ArrayIntList refs : citations) { i += 1; if (refs == null) { continue; } for (int j=0; j<refs.size();j++) { if (refs.get(j) == -1) continue; addReference(refs.get(j),i); } } } private class CitationDataIterator implements Iterator<int[][]> { int cursor = 0; // index of next element to return public boolean hasNext() { return cursor != citations.size(); } public int[][] next() { int i = cursor; if (i >= citations.size()) throw new NoSuchElementException(); int[][] out = new int[2][]; ArrayIntList v1 = references.get(cursor); ArrayIntList v2 = citations.get(cursor); out[0] = v1 != null ? v1.getElements() : new int[0]; out[1] = v2 != null ? v2.getElements() : new int[0]; cursor = i + 1; return out; } public void remove() { throw new UnsupportedOperationException(); } } }; }