package er.neo4jadaptor.query.lucene; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.neo4j.graphdb.Node; import org.neo4j.graphdb.PropertyContainer; import org.neo4j.graphdb.Relationship; import org.neo4j.graphdb.index.Index; import org.neo4j.graphdb.index.IndexHits; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.webobjects.eoaccess.EOAttribute; import com.webobjects.eoaccess.EOEntity; import com.webobjects.eoaccess.EORelationship; import com.webobjects.eocontrol.EOAndQualifier; import com.webobjects.eocontrol.EOKeyValueQualifier; import com.webobjects.eocontrol.EOOrQualifier; import com.webobjects.eocontrol.EOQualifier; import com.webobjects.foundation.NSArray; import er.neo4jadaptor.ersatz.lucene.LuceneTranslator; import er.neo4jadaptor.ersatz.neo4j.Neo4JTranslator; import er.neo4jadaptor.query.Results; import er.neo4jadaptor.query.lucene.results.LuceneIndexHits; /** * Consider EOQualifier: * <pre> * (((service.dvbOriginalNetworkId = 1536) and (service.dvbTransportStreamId = 2069) and (service.dvbServiceId = 19322)) and (endDateTime >= (com.webobjects.foundation.NSTimestamp)'2012-06-07 04:00:00 Etc/GMT')) * </pre> * * LuceneQueryConverter doesn't support relationships so it would convert it to: * <pre> * +(+(+#_type:WEPGEvent +#_type:WEPGEvent +#_type:WEPGEvent) +endDateTime:[2012060706:00:00:000 TO ZZZZ]) +#_type:WEPGEvent * </pre> * * which would return more results than actually matching the original query. We could optimize lucene query by first searching for services * matching * <pre> * dvbOriginalNetworkId = 1536 and dvbTransportStreamId = 2069 and dvbServiceId = 19322 * </pre> * and then replace * ((service.dvbOriginalNetworkId = 1536) and (service.dvbTransportStreamId = 2069) and (service.dvbServiceId = 19322)) * part with matching service IDs, so we could get in result something like: * +(+(+(+#_type:WEPGEvent +#_type:WEPGEvent +#_type:WEPGEvent) +endDateTime:[2012060706:00:00:000 TO ZZZZ]) +#_type:WEPGEvent) +(serviceId:00000000000000002026) * instead. * <p> * Due to the fact that optimization process makes another Lucene query which has a around-constant overhead, we perform optimization * attempt only if the initial number of results exceeds threshold of {@value #OPTIMIZATION_TRESHOLD}. * * TODO: refactor * * @author Jedrzej Sobanski * * @param <Type> */ public class LuceneOptimizer <Type extends PropertyContainer> { private static final Logger log = LoggerFactory.getLogger(LuceneOptimizer.class); /** * Perform optimization attempt only if there are more then this many results. */ public static final int OPTIMIZATION_TRESHOLD = 1000; private final Index<Type> index; public LuceneOptimizer(Index<Type> index) { this.index = index; } public static boolean canBeOptimized(IndexHits<? extends PropertyContainer> hits, EOQualifier qualifier) { return hits.size() > OPTIMIZATION_TRESHOLD && false == containsAlternatives(qualifier); } private static <Type extends PropertyContainer> long getObjectId(Type t) { if (t instanceof Node) { return ((Node) t).getId(); } else if (t instanceof Relationship) { return ((Relationship) t).getId(); } else { throw new IllegalArgumentException("Doesn't know type of object " + t); } } public Results<Type> optimize(Query q, EOEntity entity, EOQualifier qualifier) { Map<EORelationship, List<Type>> relToNodes = relationshipsToNodes(entity, qualifier); BooleanQuery boolQuery = new BooleanQuery(); IndexHits<Type> hits; // TODO: we make implicit assumption here that q is only conjunction boolQuery.add(q, Occur.MUST); for (EORelationship r : relToNodes.keySet()) { NSArray<EOAttribute> srcAtts = r.sourceAttributes(); BooleanQuery relationshipQuery = new BooleanQuery(); EOAttribute att; if (srcAtts.count() != 1) { throw new IllegalArgumentException(); } else { att = srcAtts.get(0); } for (Type n : relToNodes.get(r)) { Object ultimateId = Neo4JTranslator.instance.toNeutralValue(getObjectId(n), att); String luceneValue = LuceneTranslator.instance.fromNeutralValue(ultimateId, att); Term term = new Term(att.name(), luceneValue); TermQuery termQuery = new TermQuery(term); if (relationshipQuery.clauses().size() >= BooleanQuery.getMaxClauseCount()) { BooleanQuery.setMaxClauseCount(BooleanQuery.getMaxClauseCount() + 10); } relationshipQuery.add(termQuery, Occur.SHOULD); } boolQuery.add(relationshipQuery, Occur.MUST); } hits = index.query(boolQuery); log.debug("Querying lucene with {}.", q); return new LuceneIndexHits<>(hits); } private Map<EORelationship, List<Type>> relationshipsToNodes(EOEntity entity, EOQualifier qualifier) { Map<EORelationship, List<EOKeyValueQualifier>> relToQualifiers = new HashMap<EORelationship, List<EOKeyValueQualifier>>(); Map<EORelationship, List<Type>> relToNodes = new HashMap<EORelationship, List<Type>>(); LuceneQueryConverter luceneConverter = new LuceneQueryConverter(); collectUsedRelationships(relToQualifiers, entity, qualifier); for (EORelationship r : relToQualifiers.keySet()) { NSArray<EOQualifier> qualifiers = new NSArray<>(relToQualifiers.get(r)); Query luceneQuery; List<Type> nodes = new ArrayList<>(); luceneQuery = luceneConverter.fullQuery(r.destinationEntity(), new EOAndQualifier(qualifiers)); for (Type node : index.query(luceneQuery)) { nodes.add(node); } relToNodes.put(r, nodes); } return relToNodes; } private static boolean containsAlternatives(EOQualifier q) { if (q instanceof EOOrQualifier) { return true; } if (q instanceof EOAndQualifier) { for (EOQualifier q1 : ((EOAndQualifier) q).qualifiers()) { if (containsAlternatives(q1)) { return true; } } } return false; } /** * Qualifier MUST NOT contain any alternatives * @param q * @return * * TODO: it doesn't look good - supports only AND and key-value qualifiers */ private void collectUsedRelationships(Map<EORelationship, List<EOKeyValueQualifier>> result, EOEntity e, EOQualifier q) { if (q instanceof EOKeyValueQualifier) { EOKeyValueQualifier kvq = (EOKeyValueQualifier) q; String key = kvq.key(); String [] segments = key.split("\\."); if (segments.length == 2) { EORelationship r = e.relationshipNamed(segments[0]); if (r != null && false == r.isToMany() && false == r.isFlattened()) { List<EOKeyValueQualifier> list = result.get(r); if (list == null) { list = new ArrayList<>(); result.put(r, list); } list.add(new EOKeyValueQualifier(segments[1], kvq.selector(), kvq.value())); } } else { // ignore, too complex } } else if (q instanceof EOAndQualifier) { for (EOQualifier q1 : ((EOAndQualifier) q).qualifiers()) { collectUsedRelationships(result, e, q1); } } else { throw new IllegalArgumentException("Qualifiers different than " + EOAndQualifier.class.getSimpleName() + " or " + EOKeyValueQualifier.class.getSimpleName() + " are not supported"); } } }