package no.difi.datahotel.logic;
import no.difi.datahotel.model.FieldLight;
import no.difi.datahotel.model.Metadata;
import no.difi.datahotel.util.CSVReader;
import no.difi.datahotel.util.Filesystem;
import no.difi.datahotel.util.MetadataLogger;
import no.difi.datahotel.util.Timestamp;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import java.io.File;
import java.util.HashSet;
import java.util.Map;
import java.util.logging.Level;
import static no.difi.datahotel.util.Filesystem.*;
@Component("index")
public class IndexBean {
public static Version version = Version.LUCENE_34;
@SuppressWarnings("rawtypes")
public static StandardAnalyzer analyzer = new StandardAnalyzer(version, new HashSet());
@Autowired
private FieldBean fieldBean;
private CSVReader csvReaderFactory = new CSVReader();
public void delete(String location) {
Filesystem.delete(FOLDER_CACHE_INDEX, location);
}
@SuppressWarnings("rawtypes")
public void update(Metadata metadata) {
MetadataLogger logger = metadata.getLogger();
Timestamp ts = new Timestamp(FOLDER_CACHE_INDEX, metadata.getLocation(), "timestamp");
if (metadata.getUpdated() == ts.getTimestamp()) {
logger.info("Index up to date.");
return;
}
logger.info("Building index.");
long i = 0;
try {
File filename = Filesystem.getFile(FOLDER_SLAVE, metadata.getLocation(), FILE_DATASET);
Directory dir = FSDirectory.open(Filesystem.getFolder(FOLDER_CACHE_INDEX, metadata.getLocation()));
StandardAnalyzer analyzer = new StandardAnalyzer(version, new HashSet());
IndexWriterConfig writerConfig = new IndexWriterConfig(version, analyzer);
IndexWriter writer = new IndexWriter(dir, writerConfig);
writer.deleteAll();
CSVReader csv = csvReaderFactory.open(filename);
while (csv.hasNext()) {
try {
i++;
Map<String, String> line = csv.getNextLine();
Document doc = new Document();
String searchable = "";
for (FieldLight f : fieldBean.getFields(metadata)) {
String value = line.get(f.getShortName());
if (value == null)
logger.info("Field not found: " + f.getShortName());
// TODO if (f.getGroupable())
if (value.matches("[0-9.,]+"))
doc.add(new Field(f.getShortName(), value, Store.YES, Index.NOT_ANALYZED_NO_NORMS));
else
doc.add(new Field(f.getShortName(), value, Store.YES, Index.ANALYZED));
if (f.getSearchable())
searchable += " " + line.get(f.getShortName());
}
if (!searchable.trim().isEmpty())
doc.add(new Field("searchable", searchable.trim(), Store.NO, Index.ANALYZED));
writer.addDocument(doc);
} catch (Exception e) {
logger.info("[" + e.getClass().getSimpleName() + (e.getStackTrace().length > 0 ? "][" + e.getStackTrace()[0].getFileName() + ":" + e.getStackTrace()[0].getLineNumber() : "") + "] Unable to index line " + i + ". (" + String.valueOf(e.getMessage())
+ ")");
}
if (i % 10000 == 0)
logger.info("Document " + i);
}
writer.optimize();
writer.commit();
writer.close();
dir.close();
ts.setTimestamp(metadata.getUpdated());
ts.save();
} catch (Exception e) {
logger.log(Level.WARNING, e.getMessage(), e);
}
}
public void setFielBean(FieldBean fieldBean) {
this.fieldBean = fieldBean;
}
}