/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.solbase.lucenehbase;
import java.io.IOException;
import java.util.ArrayList;
import java.util.TreeMap;
import java.util.UUID;
import java.util.concurrent.TimeoutException;
import net.rubyeye.xmemcached.exception.MemcachedException;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.index.Term;
import org.apache.solr.schema.IndexSchema;
import org.solbase.cache.CachedObjectWrapper;
import org.solbase.cache.LayeredCache;
import org.solbase.cache.SolbaseLRUCache;
import org.solbase.cache.ThreadLocalCache;
import org.solbase.cache.VersionedCache;
import org.solbase.indexer.ParsedDoc;
public class ReaderCache {
public static final long DOCUMENT_CACHE_TIMEOUT = 1000 * 60 * 120; //2 hours;
// 2 hours; TODO: making it 2 hours since we have cross shard version expiring issue
// if one of shard updates version identifier for given term, other shards do not have to load, but since version identifier changed,
// it will try to load same data again from hbase.
public static final long TERM_DOC_METADATA_CACHE_TIMEOUT = 1000 * 60 * 120;
private static class Caches {
ThreadLocalCache<Integer, Document, Long> docThreadLocalCache;
ThreadLocalCache<Term, CompactedTermDocMetadataArray, TermDocMetadataVersionIdentifier> termDocMetadatasLocalCache;
LayeredCache<Integer, Document, Long, ParsedDoc> luceneDocument;
LayeredCache<String, Document, Long, Document> shardDocument;
LayeredCache<Term, CompactedTermDocMetadataArray, TermDocMetadataVersionIdentifier, TermDocMetadata> termDocMetadatas;
}
public static IndexSchema schema;
private static TreeMap<String, Caches> caches = new TreeMap<String, Caches>();
public static final Object fieldCacheKey = UUID.randomUUID();
public static CachedObjectWrapper<Document, Long> getDocument(String docIdKey, FieldSelector fieldSelector, String indexName, int start, int end) throws IOException, InterruptedException, MemcachedException, TimeoutException {
return getCache(indexName).shardDocument.getCachedObject(docIdKey, new ShardDocumentLoader(fieldSelector), indexName, start, end);
}
public static CachedObjectWrapper<Document, Long> getDocument(Integer docId, FieldSelector fieldSelector, String indexName, int start, int end) throws IOException, InterruptedException, MemcachedException, TimeoutException {
return getCache(indexName).luceneDocument.getCachedObject(docId, new DocumentLoader(fieldSelector, schema), indexName, start, end);
}
public static CachedObjectWrapper<Document, Long> getDocument(Integer docId, FieldSelector fieldSelector, String indexName, int start, int end, HTableInterface htable) throws IOException, InterruptedException, MemcachedException, TimeoutException {
return getCache(indexName).luceneDocument.getCachedObject(docId, new DocumentLoader(fieldSelector, schema, htable), indexName, start, end);
}
public static CachedObjectWrapper<CompactedTermDocMetadataArray, TermDocMetadataVersionIdentifier> getTermDocsMetadata(Term term, String indexName, int start, int end) throws IOException, InterruptedException, MemcachedException, TimeoutException {
return getCache(indexName).termDocMetadatas.getCachedObject(term, indexName, start, end);
}
/*
public static CachedObjectWrapper<CompactedTermDocMetadataArray, Long> getTermDocsMetadataFromCacheOnly(Term term, String indexName) throws IOException, InterruptedException, MemcachedException, TimeoutException {
return getCache(indexName).termDocMetadatas.getCachedObjectFromCacheOnly(term, indexName);
}
*/
public static void updateTermDocsMetadata(Term term, TermDocMetadata modificationData, String indexName, IndexWriter writer, LayeredCache.ModificationType modType, boolean updateStore, int startDocId, int endDocId) throws IOException, InterruptedException, MemcachedException, TimeoutException {
getCache(indexName).termDocMetadatas.updateCachedObject(term, modificationData, indexName, writer, modType, updateStore, startDocId, endDocId);
}
public static void updateDocument(String docId, Document document, String indexName, IndexWriter writer, LayeredCache.ModificationType modType, boolean updateStore, int startDocId, int endDocId) throws IOException, InterruptedException, MemcachedException, TimeoutException {
getCache(indexName).shardDocument.updateCachedObject(docId, document, new ShardDocumentLoader(null), indexName, writer, modType, updateStore, startDocId, endDocId);
}
public static void updateDocument(Integer docId, ParsedDoc document, String indexName, IndexWriter writer, LayeredCache.ModificationType modType, boolean updateStore, int startDocId, int endDocId) throws IOException, InterruptedException, MemcachedException, TimeoutException {
getCache(indexName).luceneDocument.updateCachedObject(docId, document, new DocumentLoader(null, ReaderCache.schema), indexName, writer, modType, updateStore, startDocId, endDocId);
}
public static void flushThreadLocalCaches(String indexName) throws IOException {
getCache(indexName).docThreadLocalCache.clear();
getCache(indexName).termDocMetadatasLocalCache.clear();
}
// note - 200k docs and 500k tv sum'd up to 15~17G total heap space
private static synchronized Caches getCache(String indexName) {
Caches cacheGroup = caches.get(indexName);
if (cacheGroup == null ) {
cacheGroup = new Caches();
// Document caching
ArrayList<VersionedCache<Integer, Document, Long>> docCaches = new ArrayList<VersionedCache<Integer, Document, Long>>();
cacheGroup.docThreadLocalCache = new ThreadLocalCache<Integer, Document, Long>();
docCaches.add(cacheGroup.docThreadLocalCache);
docCaches.add(new SolbaseLRUCache<Integer, Document, Long>(500, "Document~"+indexName)); // 1024 * 500 = 500k
//docCaches.add(new SoftReferenceCache<Integer, Document, Long>());
//docCaches.add(new MemcacheCache<Integer, Document, Long>());
cacheGroup.luceneDocument = new LayeredCache<Integer, Document, Long, ParsedDoc>(DOCUMENT_CACHE_TIMEOUT, docCaches);
// Solr shard doc caching
// maybe I only need soft reference memory cache instead of memcache for solr doc
//ArrayList<VersionedCache<String, Document, Long>> solrDocCaches = new ArrayList<VersionedCache<String, Document, Long>>();
//solrDocCaches.add(new SolbaseLRUCache<String, Document, Long>(500));
//solrDocCaches.add(new SoftReferenceCache<String, Document, Long>());
//solrDocCaches.add(new MemcacheCache<String, Document, Long>());
//cacheGroup.shardDocument = new LayeredCache<String, Document, Long>(CACHE_TIMEOUT, solrDocCaches);
// Term Vector Caching
ArrayList<VersionedCache<Term, CompactedTermDocMetadataArray, TermDocMetadataVersionIdentifier>> termDocMetadataCaches = new ArrayList<VersionedCache<Term, CompactedTermDocMetadataArray, TermDocMetadataVersionIdentifier>>();
cacheGroup.termDocMetadatasLocalCache = new ThreadLocalCache<Term, CompactedTermDocMetadataArray, TermDocMetadataVersionIdentifier>();
termDocMetadataCaches.add(cacheGroup.termDocMetadatasLocalCache);
termDocMetadataCaches.add(new SolbaseLRUCache<Term,CompactedTermDocMetadataArray, TermDocMetadataVersionIdentifier>(250, "TV~"+indexName)); // 1024 * 250 = 250k
//termDocMetadataCaches.add(new SoftReferenceCache<Term,SerializableTermDocMetadataArray, Long>());
//termDocMetadataCaches.add(new MemcacheCache<Term,SerializableTermDocMetadataArray, Long>());
cacheGroup.termDocMetadatas = new LayeredCache<Term, CompactedTermDocMetadataArray, TermDocMetadataVersionIdentifier, TermDocMetadata>(new TermDocMetadataLoader(), TERM_DOC_METADATA_CACHE_TIMEOUT, termDocMetadataCaches);
caches.put(indexName, cacheGroup);
}
return cacheGroup;
}
}