/*****************************************************************************
* Copyright 2012 bitsofinfo.g [at] gmail [dot] com
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*
* Author: bitsofinfo.g [at] gmail [dot] com
* @see bitsofinfo.wordpress.com
*****************************************************************************/
package org.bitsofinfo.util.address.usps.ais.index;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Set;
import org.apache.commons.beanutils.PropertyUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.springframework.beans.factory.annotation.Autowired;
import org.bitsofinfo.util.address.usps.ais.USPSRecord;
import org.bitsofinfo.util.address.usps.ais.USPSUtils;
public class LuceneIndexService implements USPSIndexService {
@Autowired
private USPSUtils uspsUtils;
// the root dir for all of our indexes
private File indexRootDir = null;
// if we are read only or not
private boolean readOnly = false;
private USPSRecordAnalyzer uspsRecordAnalyzer = new USPSRecordAnalyzer();
private HashMap<Class,LuceneIndex> indexMap = new HashMap<Class,LuceneIndex>();
private int indexCount = 0;
public void setIndexRootDir(String path) {
this.indexRootDir = new File(path);
}
private IndexWriter getIndexWriter(File targetDirPath,boolean optimizeForHeavyWriteOp, boolean create)
throws CorruptIndexException, LockObtainFailedException,IOException {
boolean autoCommit = true;
long ramBufferSize = 64;//mb
boolean compoundFile = true;
int mergeFactor = 20;
if (optimizeForHeavyWriteOp) {
autoCommit = false;
ramBufferSize = 100;//mb
compoundFile = false;
mergeFactor = mergeFactor * 2;
}
IndexWriter indexWriter = new IndexWriter(FSDirectory.open(targetDirPath),uspsRecordAnalyzer,create,MaxFieldLength.UNLIMITED);
indexWriter.setRAMBufferSizeMB(ramBufferSize);
indexWriter.setUseCompoundFile(compoundFile);
indexWriter.setMergeFactor(mergeFactor);
return indexWriter;
}
protected void indexRecords(List<USPSRecord> records) throws Exception {
//IndexWriter idxWriter = new IndexWriter();
}
protected Document uspsRecordToDocument(USPSRecord record) throws Exception {
// create a new document
Document doc = new Document();
// add the identifier field (stored, indexed but not analyzed)
Field idField = new Field("identifier",record.getIdentifier(),Field.Store.YES,Field.Index.NOT_ANALYZED);
// get all field names, and index these values
String[] fields2index = uspsUtils.getKeyFieldNames(record);
for (String fieldName : fields2index) {
Object rawVal = null;
try {
rawVal = PropertyUtils.getProperty(record, fieldName);
} catch(Exception e) {
// HANDLE THIS..(bad prop name etc).
rawVal = null;
}
// index/analyze the field value, do NOT store it
if (rawVal != null) {
Field f = new Field(fieldName,rawVal.toString(),Field.Store.NO,Field.Index.ANALYZED);
doc.add(f);
}
}
return doc;
}
public void initialize() {
try {
// ensure the indexer root dir exists
if (!indexRootDir.exists()) {
indexRootDir.mkdirs();
}
// create the storage structure for all our indexes
Set<Class> clazzes = uspsUtils.getUSPSRecordClasses();
for (Class clazz : clazzes) {
File baseDir = new File(indexRootDir.getAbsolutePath() + "/" + clazz.getSimpleName());
File repopDir = new File(baseDir.getAbsolutePath() + "/repopulate");
File activeDir = new File(baseDir.getAbsolutePath() + "/active");
if (!baseDir.exists()) {
baseDir.mkdirs();
}
if (!repopDir.exists()) {
repopDir.mkdirs();
this.getIndexWriter(repopDir, false, true); // create it
}
if (!activeDir.exists()) {
activeDir.mkdirs();
this.getIndexWriter(activeDir, false, true); // create it
}
indexMap.put(clazz,new LuceneIndex(activeDir,repopDir));
}
} catch(Exception e) {
}
}
@Override
public void index(List<USPSRecord> records) {
// sort em all out
HashMap<Class,List<USPSRecord>> clazzMap = new HashMap<Class,List<USPSRecord>>();
for (USPSRecord record : records) {
Class clazz = record.getClass();
List<USPSRecord> tmp = clazzMap.get(clazz);
if (tmp == null) {
tmp = new ArrayList<USPSRecord>();
clazzMap.put(clazz, tmp);
}
tmp.add(record);
}
for (Class clazz : clazzMap.keySet()) {
List<USPSRecord> items = clazzMap.get(clazz);
LuceneIndex index = indexMap.get(clazz);
IndexWriter writer = null;
try {
writer = this.getIndexWriter(index.activeDir, true, false);
for (USPSRecord r : items) {
Document doc = this.uspsRecordToDocument(r);
Term id = new Term("identifier",r.getIdentifier());
writer.updateDocument(id, doc, this.uspsRecordAnalyzer);
}
if (indexCount == 10) {
writer.optimize();
indexCount=0;
}
} catch(Exception e) {
} finally {
if (writer != null) {
try {writer.close();} catch(Exception ignoreForNow){}
}
}
}
indexCount++;
}
}