package org.ariadne_eu.metadata.insert; import java.io.ByteArrayInputStream; import java.io.IOException; import java.util.Date; import net.sourceforge.minor.lucene.core.indexer.IndexInserterDelegate; import org.apache.log4j.Logger; import org.apache.lucene.document.DateTools; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.ariadne.config.PropertiesManager; import org.ariadne_eu.utils.config.RepositoryConstants; import org.ariadne_eu.utils.mace.MACEUtils; import org.eun.lucene.core.indexer.document.DocumentHandler; import org.eun.lucene.core.indexer.document.DocumentHandlerException; import org.eun.lucene.core.indexer.document.HandlerFactory; public class InsertDelegateSingleStringImpl implements IndexInserterDelegate { private static Logger log = Logger.getLogger(InsertDelegateSingleStringImpl.class); private String metadata; private String key; private String[] collection; public InsertDelegateSingleStringImpl(String _key, String _metadata, String[] _collection){ this.metadata = _metadata; this.key = _key; this.collection = _collection; } public void insert(IndexWriter writer) throws IOException { DocumentHandler handler = HandlerFactory.getDocumentHandlerImpl(); Document doc=null; try { String insertMetadata = metadata; if (metadata.startsWith("<?")) { insertMetadata = metadata.substring(metadata.indexOf("?>")+2); } doc = handler.getDocument(new ByteArrayInputStream(metadata.getBytes("UTF-8"))); doc.add(new Field("key", key, Field.Store.YES, Field.Index.NOT_ANALYZED )); for (String collection : this.collection) { doc.add(new Field("collection", collection.toLowerCase(), Field.Store.YES, Field.Index.NOT_ANALYZED )); } doc.add(new Field("date.insert", DateTools.dateToString(new Date(), DateTools.Resolution.MILLISECOND), Field.Store.YES, Field.Index.NOT_ANALYZED)); String luceneHandler = PropertiesManager.getInstance().getProperty(RepositoryConstants.getInstance().SR_LUCENE_HANDLER); if (luceneHandler.equalsIgnoreCase("org.ariadne_eu.utils.lucene.document.MACELOMHandler")) { MACEUtils.getClassification(); String exml = MACEUtils.enrichWClassification(insertMetadata); exml = exml.substring(38); //to remove the opening xml element doc.add(new Field("maceenrichedlom", exml, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); } if (!luceneHandler.equalsIgnoreCase("org.ariadne_eu.utils.lucene.document.LOMLiteHandler")) { doc.add(new Field("md", insertMetadata, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); } // writer.addDocument(doc); Term term = new Term("key", key); writer.setMergeFactor(15); writer.setRAMBufferSizeMB(48); writer.updateDocument(term, doc); } catch (DocumentHandlerException e) { log.error("insert: ", e); } } }