/* * Geotoolkit - An Open Source Java GIS Toolkit * http://www.geotoolkit.org * * (C) 2007 - 2009, Geomatys * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 3 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. */ package org.geotoolkit.lucene.index; import java.io.IOException; import java.nio.file.DirectoryStream; import java.sql.SQLException; import java.util.*; import java.util.logging.Level; // JTS dependencies import com.vividsolutions.jts.geom.*; import java.nio.file.Files; import java.nio.file.Path; // Apache Lucene dependencies import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.StoredField; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.sis.util.ArgumentChecks; import org.geotoolkit.geometry.jts.JTS; // Geotoolkit dependencies import org.geotoolkit.index.tree.StoreIndexException; import org.geotoolkit.index.tree.TreeElementMapper; import org.geotoolkit.index.tree.manager.NamedEnvelope; import org.geotoolkit.index.tree.manager.SQLRtreeManager; import org.geotoolkit.io.wkb.WKBUtils; import org.geotoolkit.index.IndexingException; import org.geotoolkit.lucene.LuceneUtils; import org.geotoolkit.lucene.filter.LuceneOGCFilter; import org.geotoolkit.nio.IOUtilities; import org.geotoolkit.util.collection.CloseableIterator; import org.opengis.geometry.MismatchedReferenceSystemException; // Types dependencies import org.opengis.referencing.crs.CoordinateReferenceSystem; import org.opengis.referencing.operation.TransformException; import org.opengis.util.FactoryException; /** * An abstract lucene Indexer used to create and writer lucene index. * * @author Mehdi Sidhoum * @author Guilhem Legal * @param <E> The object's type to insert in a document. * @module */ public abstract class AbstractIndexer<E> extends IndexLucene { protected static final String CORRUPTED_SINGLE_MSG = "CorruptIndexException while indexing document: "; protected static final String CORRUPTED_MULTI_MSG = "CorruptIndexException while indexing document: "; protected static final String LOCK_SINGLE_MSG = "LockObtainException while indexing document: "; protected static final String LOCK_MULTI_MSG = "LockObtainException while indexing documents."; protected static final String IO_SINGLE_MSG = "IOException while indexing document: "; /** * A flag use in child constructor. */ private boolean needCreation; /** * A flag to stop the indexation going on */ protected static boolean stopIndexing = false; /** * A list of services id */ protected static final List<String> indexationToStop = new ArrayList<>(); /** * Map of fieldName / Number type. */ private final Map<String, String> numericFields = new HashMap<>(); /** * Build a new Indexer witch create an index in the specified directory, * with the specified analyzer. * * @param indexID * @param configDirectory * @param analyzer */ public AbstractIndexer(final String indexID, final Path configDirectory, final Analyzer analyzer) { super(analyzer); ArgumentChecks.ensureNonNull("indexID", indexID); ArgumentChecks.ensureNonNull("configDirectory", configDirectory); try { // we get the last index directory long maxTime = 0; Path currentIndexDirectory = null; if (Files.exists(configDirectory) && Files.isDirectory(configDirectory)) { try (final DirectoryStream<Path> dirStream = Files.newDirectoryStream(configDirectory)) { for (Path indexDirectory : dirStream) { String suffix = indexDirectory.getFileName().toString(); suffix = suffix.substring(suffix.lastIndexOf('-') + 1); try { long currentTime = Long.parseLong(suffix); if (currentTime > maxTime) { maxTime = currentTime; currentIndexDirectory = indexDirectory; } } catch (NumberFormatException ex) { LOGGER.log(Level.WARNING, "Unable to parse the timestamp:{0}", suffix); } } } } if (currentIndexDirectory == null) { currentIndexDirectory = configDirectory.resolve(indexID + "index-" + System.currentTimeMillis()); Files.createDirectories(currentIndexDirectory); needCreation = true; setFileDirectory(currentIndexDirectory); } else { LOGGER.finer("Index already created."); deleteOldIndexDir(configDirectory, indexID, currentIndexDirectory.getFileName().toString()); // must be set before reading tree setFileDirectory(currentIndexDirectory); needCreation = false; } rTree = SQLRtreeManager.get(currentIndexDirectory, this); } catch (IOException ex) { throw new RuntimeException(ex); } } /** * Build a new Indexer witch create an index in the specified directory, * with a Keyword analyzer. * * @param indexID * @param configDirectory */ public AbstractIndexer(final String indexID, final Path configDirectory) { this(indexID, configDirectory, null); } public boolean needCreation() { return needCreation; } /** * Replace the precedent index directory by another pre-generated. */ private void deleteOldIndexDir(final Path configDirectory, final String serviceID, final String currentDirName) throws IOException { try (final DirectoryStream<Path> dirStream = Files.newDirectoryStream(configDirectory)) { for (Path indexDirectory : dirStream) { if (isIndexDir(indexDirectory, serviceID)) { final String dirName = indexDirectory.getFileName().toString(); if (!dirName.equals(currentDirName)) { IOUtilities.deleteRecursively(indexDirectory); } } } } } protected abstract Collection<String> getAllIdentifiers() throws IndexingException; protected abstract Iterator<String> getIdentifierIterator() throws IndexingException; protected abstract Iterator<E> getEntryIterator() throws IndexingException; protected abstract boolean useEntryIterator(); protected abstract E getEntry(final String identifier) throws IndexingException; /** * Create a new Index with the specified list of object. * * @param toIndex objects to index. * @throws IndexingException */ public void createIndex(final List<E> toIndex) throws IndexingException { LOGGER.log(logLevel, "Creating lucene index for please wait..."); final long time = System.currentTimeMillis(); int nbEntries = 0; try { final IndexWriterConfig conf = new IndexWriterConfig(analyzer); final IndexWriter writer = new IndexWriter(LuceneUtils.getAppropriateDirectory(getFileDirectory()), conf); final String serviceID = getServiceID(); resetTree(); nbEntries = toIndex.size(); for (E entry : toIndex) { if (!stopIndexing && !indexationToStop.contains(serviceID)) { indexDocument(writer, entry); } else { LOGGER.info("Index creation stopped after " + (System.currentTimeMillis() - time) + " ms for service:" + serviceID); stopIndexation(writer, serviceID); return; } } // writer.optimize(); no longer justified writer.close(); // we store the numeric fields in a properties file int the index directory storeNumericFieldsFile(); } catch (IOException | StoreIndexException | SQLException ex) { LOGGER.log(Level.WARNING, IO_SINGLE_MSG, ex); } LOGGER.log(logLevel, "Index creation process in " + (System.currentTimeMillis() - time) + " ms\n" + " documents indexed: " + nbEntries); } /** * Create a new Index. * * @throws IndexingException */ public void createIndex() throws IndexingException { LOGGER.log(logLevel, "(light memory) Creating lucene index please wait..."); final long time = System.currentTimeMillis(); int nbEntries = 0; try { final IndexWriterConfig conf = new IndexWriterConfig(analyzer); final IndexWriter writer = new IndexWriter(LuceneUtils.getAppropriateDirectory(getFileDirectory()), conf); final String serviceID = getServiceID(); resetTree(); LOGGER.log(logLevel, "starting indexing..."); if (useEntryIterator()) { final Iterator<E> entries = getEntryIterator(); while (entries.hasNext()) { if (!stopIndexing && !indexationToStop.contains(serviceID)) { final E entry = entries.next(); indexDocument(writer, entry); nbEntries++; } else { LOGGER.info("Index creation stopped after " + (System.currentTimeMillis() - time) + " ms for service:" + serviceID); stopIndexation(writer, serviceID); return; } } if (entries instanceof CloseableIterator) { ((CloseableIterator)entries).close(); } } else { final Iterator<String> identifiers = getIdentifierIterator(); while (identifiers.hasNext()) { final String identifier = identifiers.next(); if (!stopIndexing && !indexationToStop.contains(serviceID)) { try { final E entry = getEntry(identifier); indexDocument(writer, entry); nbEntries++; } catch (IndexingException ex) { LOGGER.log(Level.WARNING,"Metadata IO exeption while indexing metadata: " + identifier + " " + ex.getMessage() + "\nmove to next metadata...",ex); } } else { LOGGER.info("Index creation stopped after " + (System.currentTimeMillis() - time) + " ms for service:" + serviceID); stopIndexation(writer, serviceID); return; } } if (identifiers instanceof CloseableIterator) { ((CloseableIterator)identifiers).close(); } } // writer.optimize(); no longer justified writer.close(); // we store the numeric fields in a properties file int the index directory storeNumericFieldsFile(); } catch (IOException | StoreIndexException | SQLException ex) { LOGGER.log(Level.SEVERE,IO_SINGLE_MSG + "{0}", ex.getMessage()); throw new IndexingException("IOException while indexing documents:" + ex.getMessage(), ex); } LOGGER.log(logLevel, "Index creation process in " + (System.currentTimeMillis() - time) + " ms\n documents indexed: " + nbEntries + "."); } /** * Index a document from the specified object with the specified index writer. * Used when indexing in line many document. * * @param writer An Lucene index writer. * @param meta The object to index. */ public void indexDocument(final IndexWriter writer, final E meta) throws IndexingException, IOException { final int docId = writer.maxDoc(); //adding the document in a specific model. in this case we use a MDwebDocument. writer.addDocument(createDocument(meta, docId)); LOGGER.log(Level.FINER, "Metadata: {0} indexed", getIdentifier(meta)); } /** * This method add to index of lucene a new document. * * @param meta The object to index. */ public void indexDocument(final E meta) { try { final IndexWriterConfig config = new IndexWriterConfig(analyzer); final IndexWriter writer = new IndexWriter(LuceneUtils.getAppropriateDirectory(getFileDirectory()), config); final int docId = writer.maxDoc(); //adding the document in a specific model. in this case we use a MDwebDocument. writer.addDocument(createDocument(meta, docId)); LOGGER.log(Level.FINER, "Metadata: {0} indexed", getIdentifier(meta)); writer.close(); if (rTree != null) { rTree.getTreeElementMapper().flush(); rTree.flush(); } } catch (IndexingException | StoreIndexException ex) { LOGGER.log(Level.WARNING, "Error while indexing single document", ex); } catch (IOException ex) { LOGGER.log(Level.WARNING, IO_SINGLE_MSG + ex.getMessage(), ex); } } /** * Return the identifier of the metadata * * @param metadata * @return */ protected abstract String getIdentifier(E metadata); /** * This method stop all the current indexation running */ public static void stopIndexation() { stopIndexing = true; } private void stopIndexation(final IndexWriter writer, final String serviceID) throws IOException { // writer.optimize(); no longer justified writer.close(); IOUtilities.deleteRecursively(getFileDirectory()); if (indexationToStop.contains(serviceID)) { indexationToStop.remove(serviceID); } if (indexationToStop.isEmpty()) { stopIndexing = false; } } /** * Store the numeric fields in a properties file int the index directory */ protected void storeNumericFieldsFile() { final Path indexDirectory = getFileDirectory(); final Path numericFieldFile = indexDirectory.resolve("numericFields.properties"); final Properties prop = new Properties(); prop.putAll(numericFields); try { IOUtilities.storeProperties(prop, numericFieldFile, null); } catch (IOException ex) { LOGGER.log(Level.WARNING, "Unable to store the numeric fields properties file.", ex); } } /** * Add a numeric fields to the current list. * * @param fieldName * @param numberType */ protected void addNumericField(final String fieldName, final Character numberType) { if (numericFields.get(fieldName) == null) { numericFields.put(fieldName, numberType.toString()); } } /** * This method remove index of lucene a document identified by identifier. * * @param identifier */ public void removeDocument(final String identifier) { try { final Directory dir = LuceneUtils.getAppropriateDirectory(getFileDirectory()); final Term t = new Term("id", identifier); final TermQuery query = new TermQuery(t); LOGGER.log(logLevel, "Term query:{0}", query); // look for DOC ID for R-Tree removal final NamedEnvelope env = new NamedEnvelope(getTreeCrs(), identifier); final TreeElementMapper<NamedEnvelope> mapper = rTree.getTreeElementMapper(); final int treeID = mapper.getTreeIdentifier(env); if (treeID != -1) { final NamedEnvelope realEnv = mapper.getObjectFromTreeIdentifier(treeID); boolean removed = rTree.remove(realEnv); if (!removed) { LOGGER.log(Level.WARNING, "unable to remove envelope for:{0}", identifier); } else { //remove from mapper mapper.setTreeIdentifier(null, treeID); mapper.flush(); rTree.flush(); } } final IndexWriterConfig config = new IndexWriterConfig(analyzer); final IndexWriter writer = new IndexWriter(dir, config); writer.deleteDocuments(query); LOGGER.log(logLevel, "Metadata: {0} removed from the index", identifier); writer.commit(); writer.close(); } catch (CorruptIndexException ex) { LOGGER.log(Level.WARNING, "CorruptIndexException while indexing document: " + ex.getMessage(), ex); } catch (IOException ex) { LOGGER.log(Level.WARNING, "IOException while indexing document: " + ex.getMessage(), ex); } catch (StoreIndexException ex) { LOGGER.log(Level.WARNING, "StoreIndexException while indexing document: " + ex.getMessage(), ex); } } /** * Makes a document from the specified object. * * @param object an object to index. * @return A Lucene document. */ protected abstract Document createDocument(E object, int docId) throws IndexingException; /** * Add a geometric field with on ore more boundingBox object in the specified lucene document. * * @param doc The lucene document currently building. * @param minx a list of minimal X coordinate. * @param maxx a list of maximal X coordinate. * @param miny a list of minimal Y coordinate. * @param maxy a list of maximal Y coordinate. * @param crs coordinate spatial reference. */ protected void addBoundingBox(final Document doc, final List<Double> minx, final List<Double> maxx, final List<Double> miny, final List<Double> maxy, final CoordinateReferenceSystem crs) { final Polygon[] polygons = LuceneUtils.getPolygons(minx, maxx, miny, maxy, crs); Geometry geom; if (polygons.length == 1) { geom = polygons[0]; } else if (polygons.length > 1 ){ geom = LuceneUtils.GF.createGeometryCollection(polygons); JTS.setCRS(geom, crs); } else { return; } addGeometry(doc, geom, getTreeCrs()); } /** * Add a geometric field with a JTS geometry in the specified lucene document. * @param doc The lucene document currently building. * @param geom A JTS geometry */ public NamedEnvelope addGeometry(final Document doc, final Geometry geom, final CoordinateReferenceSystem crs) { NamedEnvelope namedBound = null; try { final String id = doc.get("id"); namedBound = LuceneUtils.getNamedEnvelope(id, geom, crs); rTree.insert(namedBound); rTree.getTreeElementMapper().flush(); rTree.flush(); } catch (TransformException | FactoryException | MismatchedReferenceSystemException | StoreIndexException | IOException ex) { LOGGER.log(Level.WARNING, "Unable to insert envelope in R-Tree.", ex); } doc.add(new StoredField(LuceneOGCFilter.GEOMETRY_FIELD_NAME,WKBUtils.toWKBwithSRID(geom))); return namedBound; } /** * Free the resources. */ @Override public void destroy() { super.destroy(); } /** * This method stop all the current indexation running */ public static void stopIndexation(final List<String> ids) { stopIndexing = true; if (ids != null) { for (String id: ids) { indexationToStop.add(id); } } } /** * Return the service ID of this index or "" if there is not explicit service ID. * * @return the service ID of this index or "" if there is not explicit service ID. */ protected String getServiceID() { final Path directory = getFileDirectory(); final String directoryName = directory.getFileName().toString(); final String serviceId; if (directoryName.contains("index")) { serviceId = directoryName.substring(0, directoryName.indexOf("index")); } else { serviceId = ""; } return serviceId; } }