/** * Copyright (c) 2014, the Temporal Random Indexing AUTHORS. * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * Neither the name of the University of Bari nor the names of its contributors * may be used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 * */ package di.uniba.it.tri.vectors; import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; import com.google.common.cache.LoadingCache; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.File; import java.io.IOException; import java.util.UUID; import java.util.concurrent.ExecutionException; import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.StringField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; /** * * @author pierpaolo */ public class VectorCache { private static final String ID = UUID.randomUUID().toString().replace("-", ""); private LoadingCache<String, Vector> vectorCache; private int cacheSize = 50000; private IndexWriter writer; private DirectoryReader dirReader; private IndexSearcher searcher; private final int dimension; public VectorCache(int dimension, int cacheSize) throws IOException { this.dimension = dimension; this.cacheSize = cacheSize; this.vectorCache = CacheBuilder.newBuilder() .maximumSize(this.cacheSize) .build( new CacheLoader<String, Vector>() { @Override public Vector load(String key) throws IOException { return getVectorFromIndex(key); } }); IndexWriterConfig iwc = new IndexWriterConfig(Version.LATEST, new KeywordAnalyzer()); writer = new IndexWriter(FSDirectory.open(new File("./VC_" + ID)), iwc); dirReader = DirectoryReader.open(writer, true); searcher = new IndexSearcher(dirReader); } private Vector getVectorFromIndex(String key) throws IOException { checkSearcher(); Query q = new TermQuery(new Term("key", key)); TopDocs topDocs = searcher.search(q, 1); Vector v = null; if (topDocs.scoreDocs.length > 0) { Document doc = searcher.doc(topDocs.scoreDocs[0].doc); v = decodeVector(doc.getField("vector").binaryValue().bytes); } return v; } private void storeVector(String key, Vector vector) throws IOException { Document docv = new Document(); docv.add(new StringField("key", key, Field.Store.NO)); docv.add(getBinaryField("vector", encodeVector(vector))); writer.addDocument(docv); } private Field getBinaryField(String name, byte[] bytes) { FieldType binType = new FieldType(); binType.setDocValueType(FieldInfo.DocValuesType.BINARY); binType.setIndexOptions(FieldInfo.IndexOptions.DOCS_ONLY); binType.setStoreTermVectorOffsets(false); binType.setStoreTermVectorPayloads(false); binType.setStoreTermVectorPositions(false); binType.setStoreTermVectors(false); binType.setStored(true); binType.setTokenized(false); return new Field(name, bytes, binType); } private byte[] encodeVector(Vector vector) throws IOException { ByteArrayOutputStream byteStream = new ByteArrayOutputStream(vector.getDimension()); DataOutputStream outstream = new DataOutputStream(byteStream); vector.writeToStream(outstream); outstream.flush(); return byteStream.toByteArray(); } private synchronized void checkSearcher() throws IOException { DirectoryReader newDirReader = DirectoryReader.openIfChanged(dirReader); if (newDirReader != null) { dirReader = newDirReader; searcher = new IndexSearcher(dirReader); } } private Vector decodeVector(byte[] bytes) throws IOException { ByteArrayInputStream byteStream = new ByteArrayInputStream(bytes); DataInputStream instream = new DataInputStream(byteStream); float[] v = new float[dimension]; for (int i = 0; i < v.length; i++) { v[i] = Float.intBitsToFloat(instream.readInt()); } byteStream.close(); return new RealVector(v); } public void addVector(String key, Vector vector) throws IOException { storeVector(key, vector); vectorCache.invalidate(key); } public Vector getVector(String key) throws IOException, ExecutionException { return vectorCache.get(key); } public ObjectVector getObjectVector(String key) throws IOException, ExecutionException { Vector vector = getVector(key); return new ObjectVector(key, vector); } public int getCacheSize() { return cacheSize; } public int getDimension() { return dimension; } }