package com.tyndalehouse.step.core.data.entities.impl; import static com.tyndalehouse.step.core.utils.StringUtils.isBlank; import java.io.File; import java.io.IOException; import java.util.*; import java.util.Map.Entry; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter.MaxFieldLength; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.RAMDirectory; import org.crosswire.common.util.CollectionUtil; import org.joda.time.LocalDateTime; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.tyndalehouse.step.core.data.EntityConfiguration; import com.tyndalehouse.step.core.data.EntityManager; import com.tyndalehouse.step.core.data.FieldConfig; import com.tyndalehouse.step.core.data.create.PostProcessor; import com.tyndalehouse.step.core.exceptions.StepInternalException; /** * Reads a file and creates the equivalent Lucene index for it. This class is not thread safe. * * @author chrisburrell * */ public class EntityIndexWriterImpl { private static final Logger LOGGER = LoggerFactory.getLogger(EntityIndexWriterImpl.class); private final Directory ramDirectory; private IndexWriter writer; private final Map<String, List<FieldConfig>> luceneFieldConfigurationByRaw; private final EntityConfiguration config; private Document doc; private final EntityManager manager; /** * Responsible for writing items to an index. * * @param entityManager the configuration for that entity * @param entityName the name of the entity */ // we specifically allow a method to be overridden for testing purposes. @SuppressWarnings("PMD") public EntityIndexWriterImpl(final EntityManager entityManager, final String entityName) { this.manager = entityManager; this.config = entityManager.getConfig(entityName); final Map<String, FieldConfig> luceneFieldConfiguration = this.config.getLuceneFieldConfiguration(); this.luceneFieldConfigurationByRaw = new HashMap<String, List<FieldConfig>>(luceneFieldConfiguration.size()); // key the map by its data fields final Set<Entry<String, FieldConfig>> entrySet = luceneFieldConfiguration.entrySet(); for (final Entry<String, FieldConfig> entry : entrySet) { final String[] rawDataField = entry.getValue().getRawDataField(); for (final String rawDString : rawDataField) { List<FieldConfig> configs = luceneFieldConfigurationByRaw.get(rawDString); if(configs == null) { configs = new ArrayList<FieldConfig>(1); this.luceneFieldConfigurationByRaw.put(rawDString, configs); } configs.add(entry.getValue()); } } this.ramDirectory = getNewRamDirectory(); try { this.writer = new IndexWriter(this.ramDirectory, this.config.getAnalyzerInstance(), MaxFieldLength.UNLIMITED); } catch (final IOException e) { throw new StepInternalException("Unable to initialise creation of index", e); } } /** * @return a new ram directory */ Directory getNewRamDirectory() { return new RAMDirectory(); } /** * writes the index to the relevant file location * * @return the number of entries in the index */ public int close() { final int numEntries = getNumEntriesInIndex(); final File file = new File(this.config.getLocation()); Directory destination; try { // we've finished writing entries now, so close our writer this.writer.close(); // open up a location on disk destination = FSDirectory.open(file); final IndexWriter fsWriter = new IndexWriter(destination, this.config.getAnalyzerInstance(), true, IndexWriter.MaxFieldLength.UNLIMITED); fsWriter.addIndexesNoOptimize(new Directory[] { this.ramDirectory }); fsWriter.optimize(); fsWriter.close(); destination.close(); this.ramDirectory.close(); this.manager.refresh(this.config.getName()); } catch (final IOException e) { throw new StepInternalException("Unable to write index", e); } return numEntries; } /** * @return the writer of the index into RAM */ IndexWriter getRamWriter() { return this.writer; } /** * @return the number of entries in index */ int getNumEntriesInIndex() { return this.writer.maxDoc(); } /** * Adds a field to the current document * * @param fieldName the field name * @param fieldValue the field value */ public void addFieldToCurrentDocument(final String fieldName, final Number fieldValue) { if (fieldValue == null) { return; } ensureNewDocument(); final List<FieldConfig> fieldConfigs = this.luceneFieldConfigurationByRaw.get(fieldName); if (fieldConfigs == null || fieldConfigs.size() == 0) { LOGGER.trace("Skipping field: [{}]", fieldName); return; } for(FieldConfig fieldConfig : fieldConfigs) { this.doc.add(fieldConfig.getField(fieldValue)); } } /** * Adds a field to the current document * * @param fieldName the field name * @param fieldValue the field value */ public void addFieldToCurrentDocument(final String fieldName, final LocalDateTime fieldValue) { if (fieldValue == null) { return; } ensureNewDocument(); final List<FieldConfig> fieldConfigs = this.luceneFieldConfigurationByRaw.get(fieldName); if (fieldConfigs == null || fieldConfigs.size() == 0) { LOGGER.trace("Skipping field: [{}]", fieldName); return; } for(FieldConfig fieldConfig : fieldConfigs) { this.doc.add(fieldConfig.getField(fieldValue)); } } /** * Adds a field to the current document * * @param fieldName the field name * @param fieldValue the field value */ public void addFieldToCurrentDocument(final String fieldName, final String fieldValue) { if (isBlank(fieldValue)) { return; } ensureNewDocument(); final List<FieldConfig> fieldConfigs = this.luceneFieldConfigurationByRaw.get(fieldName); if (fieldConfigs == null || fieldConfigs.size() == 0) { LOGGER.trace("Skipping field: [{}]", fieldName); return; } //check if we've got the field already... //if so, then we'll simply append to the existing data, as we don't want //to be storing stuff in different fields... for(FieldConfig fieldConfig : fieldConfigs) { Field existingValue = this.doc.getField(fieldConfig.getName()); if(existingValue != null && fieldConfig.isAppend()) { existingValue.setValue(existingValue.stringValue() + " " + fieldValue); continue; } //otherwise, either add for the first time, or add multiple times this.doc.add(fieldConfig.getField(fieldValue)); } } /** Creates a document if it doesn't already exist */ private void ensureNewDocument() { if (this.doc == null) { this.doc = new Document(); } } /** * saves the current document, by running the processor and adding it to the index */ public void save() { final PostProcessor postProcessorInstance = this.config.getPostProcessorInstance(); if (postProcessorInstance != null && this.doc != null) { postProcessorInstance.process(this.config, this.doc); } addDocument(); } /** * adds a document to the index */ @SuppressWarnings("PMD") private void addDocument() { try { if (this.doc != null) { this.writer.addDocument(this.doc); this.doc = null; } } catch (final IOException e) { throw new StepInternalException("Unable to write document", e); } } /** * @return the entity name */ String getEntityName() { return this.config.getName(); } }