/* * Licensed under the Apache License, Version 2.0 (the "License"); * * You may not use this file except in compliance with the License. * * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * * See the License for the specific language governing permissions and * limitations under the License. * * Contributions from 2013-2017 where performed either by US government * employees, or under US Veterans Health Administration contracts. * * US Veterans Health Administration contributions by government employees * are work of the U.S. Government and are not subject to copyright * protection in the United States. Portions contributed by government * employees are USGovWork (17USC ยง105). Not subject to copyright. * * Contribution by contractors to the US Veterans Health Administration * during this period are contractually contributed under the * Apache License, Version 2.0. * * See: https://www.usa.gov/government-works * * Contributions prior to 2013: * * Copyright (C) International Health Terminology Standards Development Organisation. * Licensed under the Apache License, Version 2.0. * */ package sh.isaac.provider.query.lucene.indexers; //~--- JDK imports ------------------------------------------------------------ import java.io.IOException; import java.util.HashSet; import java.util.Iterator; import java.util.List; import javax.inject.Inject; //~--- non-JDK imports -------------------------------------------------------- import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.lucene.document.Document; import org.apache.lucene.document.DoubleField; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.FloatField; import org.apache.lucene.document.IntField; import org.apache.lucene.document.LongField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.glassfish.hk2.runlevel.RunLevel; import org.jvnet.hk2.annotations.Service; import sh.isaac.api.Get; import sh.isaac.api.chronicle.ObjectChronology; import sh.isaac.api.collections.ConceptSequenceSet; import sh.isaac.api.component.sememe.SememeChronology; import sh.isaac.api.component.sememe.SememeType; import sh.isaac.api.component.sememe.version.ComponentNidSememe; import sh.isaac.api.component.sememe.version.DynamicSememe; import sh.isaac.api.component.sememe.version.LogicGraphSememe; import sh.isaac.api.component.sememe.version.LongSememe; import sh.isaac.api.component.sememe.version.StringSememe; import sh.isaac.api.component.sememe.version.dynamicSememe.DynamicSememeData; import sh.isaac.api.component.sememe.version.dynamicSememe.dataTypes.DynamicSememeArray; import sh.isaac.api.component.sememe.version.dynamicSememe.dataTypes.DynamicSememeBoolean; import sh.isaac.api.component.sememe.version.dynamicSememe.dataTypes.DynamicSememeByteArray; import sh.isaac.api.component.sememe.version.dynamicSememe.dataTypes.DynamicSememeDouble; import sh.isaac.api.component.sememe.version.dynamicSememe.dataTypes.DynamicSememeFloat; import sh.isaac.api.component.sememe.version.dynamicSememe.dataTypes.DynamicSememeInteger; import sh.isaac.api.component.sememe.version.dynamicSememe.dataTypes.DynamicSememeLong; import sh.isaac.api.component.sememe.version.dynamicSememe.dataTypes.DynamicSememeNid; import sh.isaac.api.component.sememe.version.dynamicSememe.dataTypes.DynamicSememePolymorphic; import sh.isaac.api.component.sememe.version.dynamicSememe.dataTypes.DynamicSememeSequence; import sh.isaac.api.component.sememe.version.dynamicSememe.dataTypes.DynamicSememeString; import sh.isaac.api.component.sememe.version.dynamicSememe.dataTypes.DynamicSememeUUID; import sh.isaac.api.index.SearchResult; import sh.isaac.api.logic.LogicNode; import sh.isaac.api.tree.TreeNodeVisitData; import sh.isaac.model.sememe.dataTypes.DynamicSememeLongImpl; import sh.isaac.model.sememe.dataTypes.DynamicSememeNidImpl; import sh.isaac.model.sememe.dataTypes.DynamicSememeStringImpl; import sh.isaac.provider.query.lucene.LuceneIndexer; import sh.isaac.provider.query.lucene.PerFieldAnalyzer; //~--- classes ---------------------------------------------------------------- /** * This class provides indexing for all String, Nid, Long and Logic Graph sememe types. * * Additionally, this class provides flexible indexing of all DynamicSememe data types. * * @author kec * @author <a href="mailto:daniel.armbrust.list@gmail.com">Dan Armbrust</a> */ @Service(name = "sememe indexer") @RunLevel(value = 2) public class SememeIndexer extends LuceneIndexer { /** The Constant log. */ private static final Logger log = LogManager.getLogger(); /** The Constant INDEX_NAME. */ public static final String INDEX_NAME = "sememes"; /** The Constant COLUMN_FIELD_DATA. */ private static final String COLUMN_FIELD_DATA = "colData"; //~--- fields -------------------------------------------------------------- /** The lric. */ @Inject private SememeIndexerConfiguration lric; //~--- constructors -------------------------------------------------------- /** * Instantiates a new sememe indexer. * * @throws IOException Signals that an I/O exception has occurred. */ private SememeIndexer() throws IOException { // For HK2 super(INDEX_NAME); } //~--- methods ------------------------------------------------------------- /** * Search for matches to the specified nid. Note that in the current implementation, you will only find matches to sememes * of type {@link SememeType#COMPONENT_NID} or {@link SememeType#LOGIC_GRAPH}. * * This only supports nids, not sequences. * * If searching a component nid sememe, this will only match on the attached component nid value. It will not match * on the assemblage concept, nor the referenced component nid. Those can be found directly via standard sememe APIs. * If searching a logic graph sememe, it will find a match in any concept that is involved in the graph, except for the * root concept. * * @param nid the id reference to search for * @param sememeConceptSequence the sememe concept sequence * @param searchColumns (optional) limit the search to the specified columns of attached data. May ONLY be provided if * ONE and only one sememeConceptSequence is provided. May not be provided if 0 or more than 1 sememeConceptSequence values are provided. * @param sizeLimit The maximum size of the result list. * @param targetGeneration target generation that must be included in the search or Long.MIN_VALUE if there is no need * to wait for a target generation. Long.MAX_VALUE can be passed in to force this query to wait until any in-progress * indexing operations are completed - and then use the latest index. * @return a List of {@code SearchResult} that contains the nid of the component that matched, and the score of that * match relative to other matches. Note that scores are pointless for exact id matches - they will all be the same. */ public List<SearchResult> query(int nid, Integer[] sememeConceptSequence, Integer[] searchColumns, int sizeLimit, Long targetGeneration) { final Query q = new QueryWrapperForColumnHandling() { @Override Query buildQuery(String columnName) { return new TermQuery(new Term(columnName + PerFieldAnalyzer.WHITE_SPACE_FIELD_MARKER, nid + "")); } }.buildColumnHandlingQuery(sememeConceptSequence, searchColumns); return search(restrictToSememe(q, sememeConceptSequence), sizeLimit, targetGeneration, null); } /** * A convenience method. * * Search DynamicSememeData columns, treating them as text - and handling the search in the same mechanism as if this were a * call to the method {@link LuceneIndexer#query(String, boolean, Integer, int, long)} * * Calls the method {@link #query(DynamicSememeDataBI, Integer, boolean, Integer[], int, long) with a null parameter for * the searchColumns, and wraps the queryString into a DynamicSememeString. * * @param queryString the query string * @param prefixSearch the prefix search * @param sememeConceptSequence the sememe concept sequence * @param sizeLimit the size limit * @param targetGeneration the target generation * @return the list */ @Override public final List<SearchResult> query(String queryString, boolean prefixSearch, Integer[] sememeConceptSequence, int sizeLimit, Long targetGeneration) { return query(new DynamicSememeStringImpl(queryString), prefixSearch, sememeConceptSequence, null, sizeLimit, targetGeneration); } /** * Query. * * @param queryData - The query data object (string, int, etc) * @param prefixSearch see {@link LuceneIndexer#query(String, boolean, ComponentProperty, int, Long)} for a description. Only applicable * when the queryData type is string. Ignored for all other data types. * @param sememeConceptSequence (optional) limit the search to the specified assemblage * @param searchColumns (optional) limit the search to the specified columns of attached data. May ONLY be provided if * ONE and only one sememeConceptSequence is provided. May not be provided if 0 or more than 1 sememeConceptSequence values are provided. * @param sizeLimit the size limit * @param targetGeneration (optional) wait for an index to build, or null to not wait * @return the list */ // TODO fix this limitation on the column restriction... public final List<SearchResult> query(final DynamicSememeData queryData, final boolean prefixSearch, Integer[] sememeConceptSequence, Integer[] searchColumns, int sizeLimit, Long targetGeneration) { Query q = null; if (queryData instanceof DynamicSememeString) { q = new QueryWrapperForColumnHandling() { @Override Query buildQuery(String columnName) { // This is the only query type that needs tokenizing, etc. String queryString = ((DynamicSememeString) queryData).getDataString(); // '-' signs are operators to lucene... but we want to allow nid lookups. So escape any leading hyphens // and any hyphens that are preceeded by spaces. This way, we don't mess up UUID handling. // (lucene handles UUIDs ok, because the - sign is only treated special at the beginning, or when preceeded by a space) if (queryString.startsWith("-")) { queryString = "\\" + queryString; } queryString = queryString.replaceAll("\\s-", " \\\\-"); log.debug("Modified search string is: ''{}''", queryString); return buildTokenizedStringQuery(queryString, columnName, prefixSearch); } }.buildColumnHandlingQuery(sememeConceptSequence, searchColumns); } else { if ((queryData instanceof DynamicSememeBoolean) || (queryData instanceof DynamicSememeNid) || (queryData instanceof DynamicSememeUUID)) { q = new QueryWrapperForColumnHandling() { @Override Query buildQuery(String columnName) { return new TermQuery(new Term(columnName + PerFieldAnalyzer.WHITE_SPACE_FIELD_MARKER, queryData.getDataObject().toString())); } }.buildColumnHandlingQuery(sememeConceptSequence, searchColumns); } else if ((queryData instanceof DynamicSememeDouble) || (queryData instanceof DynamicSememeFloat) || (queryData instanceof DynamicSememeInteger) || (queryData instanceof DynamicSememeLong) || (queryData instanceof DynamicSememeSequence)) { q = new QueryWrapperForColumnHandling() { @Override Query buildQuery(String columnName) { Query temp = buildNumericQuery(queryData, true, queryData, true, columnName); if (((queryData instanceof DynamicSememeLong) && ((DynamicSememeLong) queryData).getDataLong() < 0) || ((queryData instanceof DynamicSememeInteger) && ((DynamicSememeInteger) queryData).getDataInteger() < 0)) { // Looks like a nid... wrap in an or clause that would do a match on the exact term if it was indexed as a nid, rather than a numeric final BooleanQuery wrapper = new BooleanQuery(); wrapper.add(new TermQuery(new Term(columnName, queryData.getDataObject().toString())), Occur.SHOULD); wrapper.add(temp, Occur.SHOULD); temp = wrapper; } return temp; } }.buildColumnHandlingQuery(sememeConceptSequence, searchColumns); } else if (queryData instanceof DynamicSememeByteArray) { throw new RuntimeException("DynamicSememeByteArray isn't indexed"); } else if (queryData instanceof DynamicSememePolymorphic) { throw new RuntimeException("This should have been impossible (polymorphic?)"); } else if (queryData instanceof DynamicSememeArray) { throw new RuntimeException("DynamicSememeArray isn't a searchable type"); } else { log.error("This should have been impossible (no match on col type)"); throw new RuntimeException("unexpected error, see logs"); } } return search(restrictToSememe(q, sememeConceptSequence), sizeLimit, targetGeneration, null); } /** * Query numeric range. * * @param queryDataLower the query data lower * @param queryDataLowerInclusive the query data lower inclusive * @param queryDataUpper the query data upper * @param queryDataUpperInclusive the query data upper inclusive * @param sememeConceptSequence (optional) limit the search to the specified assemblage * @param searchColumns (optional) limit the search to the specified columns of attached data. May ONLY be provided if * ONE and only one sememeConceptSequence is provided. May not be provided if 0 or more than 1 sememeConceptSequence values are provided. * @param sizeLimit the size limit * @param targetGeneration (optional) wait for an index to build, or null to not wait * @return the list */ public final List<SearchResult> queryNumericRange(final DynamicSememeData queryDataLower, final boolean queryDataLowerInclusive, final DynamicSememeData queryDataUpper, final boolean queryDataUpperInclusive, Integer[] sememeConceptSequence, Integer[] searchColumns, int sizeLimit, Long targetGeneration) { final Query q = new QueryWrapperForColumnHandling() { @Override Query buildQuery(String columnName) { return buildNumericQuery(queryDataLower, queryDataLowerInclusive, queryDataUpper, queryDataUpperInclusive, columnName); } }.buildColumnHandlingQuery(sememeConceptSequence, searchColumns); return search(restrictToSememe(q, sememeConceptSequence), sizeLimit, targetGeneration, null); } /** * Adds the fields. * * @param chronicle the chronicle * @param doc the doc */ @Override protected void addFields(ObjectChronology<?> chronicle, Document doc) { final SememeChronology<?> sememeChronology = (SememeChronology<?>) chronicle; doc.add(new TextField(FIELD_SEMEME_ASSEMBLAGE_SEQUENCE, sememeChronology.getAssemblageSequence() + "", Field.Store.NO)); for (final Object sv: sememeChronology.getVersionList()) { if (sv instanceof DynamicSememe) { final DynamicSememe<?> dsv = (DynamicSememe<?>) sv; final Integer[] columns = this.lric.whatColumnsToIndex(dsv.getAssemblageSequence()); if (columns != null) { final int dataColCount = dsv.getData().length; for (final int col: columns) { final DynamicSememeData dataCol = (col >= dataColCount) ? null : dsv.getData(col); // Only pass in a column number if we were asked to index more than one column for this sememe handleType(doc, dataCol, (columns.length > 1) ? col : -1); } } } // TODO enhance the index configuration to allow us to configure Static sememes as indexed, or not indexed // static sememe types are never more than 1 column, always pass -1 else if (sv instanceof StringSememe) { final StringSememe<?> ssv = (StringSememe<?>) sv; handleType(doc, new DynamicSememeStringImpl(ssv.getString()), -1); incrementIndexedItemCount("Sememe String"); } else if (sv instanceof LongSememe) { final LongSememe<?> lsv = (LongSememe<?>) sv; handleType(doc, new DynamicSememeLongImpl(lsv.getLongValue()), -1); incrementIndexedItemCount("Sememe Long"); } else if (sv instanceof ComponentNidSememe) { final ComponentNidSememe<?> csv = (ComponentNidSememe<?>) sv; handleType(doc, new DynamicSememeNidImpl(csv.getComponentNid()), -1); incrementIndexedItemCount("Sememe Component Nid"); } else if (sv instanceof LogicGraphSememe) { final LogicGraphSememe<?> lgsv = (LogicGraphSememe<?>) sv; final ConceptSequenceSet css = new ConceptSequenceSet(); lgsv.getLogicalExpression().processDepthFirst((LogicNode logicNode,TreeNodeVisitData data) -> { logicNode.addConceptsReferencedByNode(css); }); css.stream().forEach(sequence -> { handleType( doc, new DynamicSememeNidImpl(Get.identifierService().getConceptNid(sequence)), -1); }); } else { log.error("Unexpected type handed to addFields in Sememe Indexer: " + sememeChronology.toString()); } } // Due to indexing all of the versions, we may have added duplicate field name/value combinations to the document. // Remove the dupes. final Iterator<IndexableField> it = doc.iterator(); final HashSet<String> uniqueFields = new HashSet<>(); while (it.hasNext()) { final IndexableField field = it.next(); final String temp = field.name() + "::" + field.stringValue(); if (uniqueFields.contains(temp)) { it.remove(); } else { uniqueFields.add(temp); } } } /** * Index chronicle. * * @param chronicle the chronicle * @return true, if successful */ @Override protected boolean indexChronicle(ObjectChronology<?> chronicle) { if (chronicle instanceof SememeChronology<?>) { final SememeChronology<?> sememeChronology = (SememeChronology<?>) chronicle; if ((sememeChronology.getSememeType() == SememeType.DYNAMIC) || (sememeChronology.getSememeType() == SememeType.STRING) || (sememeChronology.getSememeType() == SememeType.LONG) || (sememeChronology.getSememeType() == SememeType.COMPONENT_NID) || (sememeChronology.getSememeType() == SememeType.LOGIC_GRAPH)) { return true; } } return false; } /** * Builds the numeric query. * * @param queryDataLower the query data lower * @param queryDataLowerInclusive the query data lower inclusive * @param queryDataUpper the query data upper * @param queryDataUpperInclusive the query data upper inclusive * @param columnName the column name * @return the query */ private Query buildNumericQuery(DynamicSememeData queryDataLower, boolean queryDataLowerInclusive, DynamicSememeData queryDataUpper, boolean queryDataUpperInclusive, String columnName) { // Convert both to the same type (if they differ) - go largest data type to smallest, so we don't lose precision // Also - if they pass in longs that would fit in an int, also generate an int query. // likewise, with Double - if they pass in a double, that would fit in a float, also generate a float query. try { final BooleanQuery bq = new BooleanQuery(); boolean fitsInFloat = false; boolean fitsInInt = false; if ((queryDataLower instanceof DynamicSememeDouble) || (queryDataUpper instanceof DynamicSememeDouble)) { final Double upperVal = ((queryDataUpper == null) ? null : ((queryDataUpper instanceof DynamicSememeDouble) ? ((DynamicSememeDouble) queryDataUpper).getDataDouble() : ((Number) queryDataUpper.getDataObject()).doubleValue())); final Double lowerVal = ((queryDataLower == null) ? null : ((queryDataLower instanceof DynamicSememeDouble) ? ((DynamicSememeDouble) queryDataLower).getDataDouble() : ((Number) queryDataLower.getDataObject()).doubleValue())); bq.add(NumericRangeQuery.newDoubleRange(columnName, lowerVal, upperVal, queryDataLowerInclusive, queryDataUpperInclusive), Occur.SHOULD); if (((upperVal != null) && (upperVal <= Float.MAX_VALUE) && (upperVal >= Float.MIN_VALUE)) || ((lowerVal != null) && (lowerVal <= Float.MAX_VALUE) && (lowerVal >= Float.MIN_VALUE))) { fitsInFloat = true; } } if (fitsInFloat || (queryDataLower instanceof DynamicSememeFloat) || (queryDataUpper instanceof DynamicSememeFloat)) { final Float upperVal = ((queryDataUpper == null) ? null : ((queryDataUpper == null) ? null : ((queryDataUpper instanceof DynamicSememeFloat) ? ((DynamicSememeFloat) queryDataUpper).getDataFloat() : ((fitsInFloat && ((Number) queryDataUpper.getDataObject()).doubleValue() > Float.MAX_VALUE) ? Float.MAX_VALUE : ((Number) queryDataUpper.getDataObject()).floatValue())))); final Float lowerVal = ((queryDataLower == null) ? null : ((queryDataLower instanceof DynamicSememeFloat) ? ((DynamicSememeFloat) queryDataLower).getDataFloat() : ((fitsInFloat && ((Number) queryDataLower.getDataObject()).doubleValue() < Float.MIN_VALUE) ? Float.MIN_VALUE : ((Number) queryDataLower.getDataObject()).floatValue()))); bq.add(NumericRangeQuery.newFloatRange(columnName, lowerVal, upperVal, queryDataLowerInclusive, queryDataUpperInclusive), Occur.SHOULD); } if ((queryDataLower instanceof DynamicSememeLong) || (queryDataUpper instanceof DynamicSememeLong)) { final Long upperVal = ((queryDataUpper == null) ? null : ((queryDataUpper instanceof DynamicSememeLong) ? ((DynamicSememeLong) queryDataUpper).getDataLong() : ((Number) queryDataUpper.getDataObject()).longValue())); final Long lowerVal = ((queryDataLower == null) ? null : ((queryDataLower instanceof DynamicSememeLong) ? ((DynamicSememeLong) queryDataLower).getDataLong() : ((Number) queryDataLower.getDataObject()).longValue())); bq.add(NumericRangeQuery.newLongRange(columnName, lowerVal, upperVal, queryDataLowerInclusive, queryDataUpperInclusive), Occur.SHOULD); if (((upperVal != null) && (upperVal <= Integer.MAX_VALUE) && (upperVal >= Integer.MIN_VALUE)) || ((lowerVal != null) && (lowerVal <= Integer.MAX_VALUE) && (lowerVal >= Integer.MIN_VALUE))) { fitsInInt = true; } } if (fitsInInt || (queryDataLower instanceof DynamicSememeInteger) || (queryDataUpper instanceof DynamicSememeInteger) || (queryDataLower instanceof DynamicSememeSequence) || (queryDataUpper instanceof DynamicSememeSequence)) { final Integer upperVal = ((queryDataUpper == null) ? null : ((queryDataUpper instanceof DynamicSememeInteger) ? ((DynamicSememeInteger) queryDataUpper).getDataInteger() : ((queryDataUpper instanceof DynamicSememeSequence) ? ((DynamicSememeSequence) queryDataUpper).getDataSequence() : ((fitsInInt && ((Number) queryDataUpper.getDataObject()).longValue() > Integer.MAX_VALUE) ? Integer.MAX_VALUE : ((Number) queryDataUpper.getDataObject()).intValue())))); final Integer lowerVal = ((queryDataLower == null) ? null : ((queryDataLower instanceof DynamicSememeInteger) ? ((DynamicSememeInteger) queryDataLower).getDataInteger() : ((queryDataLower instanceof DynamicSememeSequence) ? ((DynamicSememeSequence) queryDataLower).getDataSequence() : ((fitsInInt && ((Number) queryDataLower.getDataObject()).longValue() < Integer.MIN_VALUE) ? Integer.MIN_VALUE : ((Number) queryDataLower.getDataObject()).intValue())))); bq.add(NumericRangeQuery.newIntRange(columnName, lowerVal, upperVal, queryDataLowerInclusive, queryDataUpperInclusive), Occur.SHOULD); } if (bq.getClauses().length == 0) { throw new RuntimeException("Not a numeric data type - can't perform a range query"); } else { final BooleanQuery must = new BooleanQuery(); must.add(bq, Occur.MUST); return must; } } catch (final ClassCastException e) { throw new RuntimeException("One of the values is not a numeric data type - can't perform a range query"); } } /** * Handle type. * * @param doc the doc * @param dataCol the data col * @param colNumber the col number */ private void handleType(Document doc, DynamicSememeData dataCol, int colNumber) { // Not the greatest design for diskspace / performance... but if we want to be able to support searching across // all fields / all sememes - and also support searching per-field within a single sememe, we need to double index // all of the data. Once with a standard field name, and once with a field name that includes the column number. // at search time, restricting to certain field matches is only allowed if they are also restricting to an assemblage, // so we can compute the correct field number list at search time. // Note, we optimize by only doing the double indexing in cases where the sememe has more than one column to begin with. // At query time, we construct the query appropriately to handle this optimization. // the cheaper option from a disk space perspective (maybe, depending on the data) would be to create a document per // column. The queries would be trivial to write then, but we would be duplicating the component nid and assemblage nid // in each document, which is also expensive. It also doesn't fit the model in OTF, of a document per component. // We also duplicate again, on string fields by indexing with the white space analyzer, in addition to the normal one. if (dataCol == null) { // noop } else if (dataCol instanceof DynamicSememeBoolean) { doc.add(new StringField(COLUMN_FIELD_DATA + PerFieldAnalyzer.WHITE_SPACE_FIELD_MARKER, ((DynamicSememeBoolean) dataCol).getDataBoolean() + "", Store.NO)); if (colNumber >= 0) { doc.add(new StringField(COLUMN_FIELD_DATA + "_" + colNumber + PerFieldAnalyzer.WHITE_SPACE_FIELD_MARKER, ((DynamicSememeBoolean) dataCol).getDataBoolean() + "", Store.NO)); } incrementIndexedItemCount("Dynamic Sememe Boolean"); } else if (dataCol instanceof DynamicSememeByteArray) { log.warn("Sememe Indexer configured to index a field that isn''t indexable (byte array)"); } else if (dataCol instanceof DynamicSememeDouble) { doc.add(new DoubleField(COLUMN_FIELD_DATA, ((DynamicSememeDouble) dataCol).getDataDouble(), Store.NO)); if (colNumber >= 0) { doc.add(new DoubleField(COLUMN_FIELD_DATA + "_" + colNumber, ((DynamicSememeDouble) dataCol).getDataDouble(), Store.NO)); } incrementIndexedItemCount("Dynamic Sememe Double"); } else if (dataCol instanceof DynamicSememeFloat) { doc.add(new FloatField(COLUMN_FIELD_DATA, ((DynamicSememeFloat) dataCol).getDataFloat(), Store.NO)); if (colNumber >= 0) { doc.add(new FloatField(COLUMN_FIELD_DATA + "_" + colNumber, ((DynamicSememeFloat) dataCol).getDataFloat(), Store.NO)); } incrementIndexedItemCount("Dynamic Sememe Float"); } else if (dataCol instanceof DynamicSememeInteger) { doc.add(new IntField(COLUMN_FIELD_DATA, ((DynamicSememeInteger) dataCol).getDataInteger(), Store.NO)); if (colNumber >= 0) { doc.add(new IntField(COLUMN_FIELD_DATA + "_" + colNumber, ((DynamicSememeInteger) dataCol).getDataInteger(), Store.NO)); } incrementIndexedItemCount("Dynamic Sememe Integer"); } else if (dataCol instanceof DynamicSememeSequence) { doc.add(new IntField(COLUMN_FIELD_DATA, ((DynamicSememeSequence) dataCol).getDataSequence(), Store.NO)); if (colNumber >= 0) { doc.add(new IntField(COLUMN_FIELD_DATA + "_" + colNumber, ((DynamicSememeSequence) dataCol).getDataSequence(), Store.NO)); } incrementIndexedItemCount("Dynamic Sememe Sequence"); } else if (dataCol instanceof DynamicSememeLong) { doc.add(new LongField(COLUMN_FIELD_DATA, ((DynamicSememeLong) dataCol).getDataLong(), Store.NO)); if (colNumber >= 0) { doc.add(new LongField(COLUMN_FIELD_DATA + "_" + colNumber, ((DynamicSememeLong) dataCol).getDataLong(), Store.NO)); } incrementIndexedItemCount("Dynamic Sememe Long"); } else if (dataCol instanceof DynamicSememeNid) { // No need for ranges on a nid, no need for tokenization (so textField, instead of string field). doc.add(new StringField(COLUMN_FIELD_DATA + PerFieldAnalyzer.WHITE_SPACE_FIELD_MARKER, ((DynamicSememeNid) dataCol).getDataNid() + "", Store.NO)); if (colNumber >= 0) { doc.add(new StringField(COLUMN_FIELD_DATA + "_" + colNumber + PerFieldAnalyzer.WHITE_SPACE_FIELD_MARKER, ((DynamicSememeNid) dataCol).getDataNid() + "", Store.NO)); } incrementIndexedItemCount("Dynamic Sememe Nid"); } else if (dataCol instanceof DynamicSememePolymorphic) { log.error("This should have been impossible (polymorphic?)"); } else if (dataCol instanceof DynamicSememeString) { doc.add(new TextField(COLUMN_FIELD_DATA, ((DynamicSememeString) dataCol).getDataString(), Store.NO)); if (colNumber >= 0) { doc.add(new TextField(COLUMN_FIELD_DATA + "_" + colNumber, ((DynamicSememeString) dataCol).getDataString(), Store.NO)); } // yes, indexed 4 different times - twice with the standard analyzer, twice with the whitespace analyzer. doc.add(new TextField(COLUMN_FIELD_DATA + PerFieldAnalyzer.WHITE_SPACE_FIELD_MARKER, ((DynamicSememeString) dataCol).getDataString(), Store.NO)); if (colNumber >= 0) { doc.add(new TextField(COLUMN_FIELD_DATA + "_" + colNumber + PerFieldAnalyzer.WHITE_SPACE_FIELD_MARKER, ((DynamicSememeString) dataCol).getDataString(), Store.NO)); } incrementIndexedItemCount("Dynamic Sememe String"); } else if (dataCol instanceof DynamicSememeUUID) { // Use the whitespace analyzer on UUIDs doc.add(new StringField(COLUMN_FIELD_DATA + PerFieldAnalyzer.WHITE_SPACE_FIELD_MARKER, ((DynamicSememeUUID) dataCol).getDataUUID().toString(), Store.NO)); if (colNumber >= 0) { doc.add(new StringField(COLUMN_FIELD_DATA + "_" + colNumber + PerFieldAnalyzer.WHITE_SPACE_FIELD_MARKER, ((DynamicSememeUUID) dataCol).getDataUUID().toString(), Store.NO)); } incrementIndexedItemCount("Dynamic Sememe UUID"); } else if (dataCol instanceof DynamicSememeArray<?>) { for (final DynamicSememeData nestedData: ((DynamicSememeArray<?>) dataCol).getDataArray()) { handleType(doc, nestedData, colNumber); } } else { log.error("This should have been impossible (no match on col type) {}", dataCol); } } //~--- inner classes ------------------------------------------------------- /** * The Class QueryWrapperForColumnHandling. */ private abstract class QueryWrapperForColumnHandling { /** * Builds the query. * * @param columnName the column name * @return the query */ abstract Query buildQuery(String columnName); /** * Builds the column handling query. * * @param sememeConceptSequence the sememe concept sequence * @param searchColumns the search columns * @return the query */ protected Query buildColumnHandlingQuery(Integer[] sememeConceptSequence, Integer[] searchColumns) { Integer[] sememeIndexedColumns = null; if ((searchColumns != null) && (searchColumns.length > 0)) { // If they provide a search column - then they MUST provide one and only one sememeConceptSequence if ((sememeConceptSequence == null) || (sememeConceptSequence.length != 1)) { throw new RuntimeException( "If a list of search columns is provided, then the sememeConceptSequence variable must contain 1 (and only 1) sememe"); } else { sememeIndexedColumns = SememeIndexer.this.lric.whatColumnsToIndex(sememeConceptSequence[0]); } } // If only 1 column was indexed from a sememe, we don't create field specific columns. if ((searchColumns == null) || (searchColumns.length == 0) || (sememeIndexedColumns == null) || (sememeIndexedColumns.length < 2)) { return buildQuery(COLUMN_FIELD_DATA); } else // If they passed a specific column to search AND the dynamic sememe type has more than 1 indexed column, then do a column specific search. { final BooleanQuery group = new BooleanQuery(); for (final int i: searchColumns) { group.add(buildQuery(COLUMN_FIELD_DATA + "_" + i), Occur.SHOULD); } return group; } } } }