/* * OrTermsQuery.java * * Copyright (c) 2007-2011, The University of Sheffield. * * This file is part of GATE Mímir (see http://gate.ac.uk/family/mimir.html), * and is free software, licenced under the GNU Lesser General Public License, * Version 3, June 2007 (also included with this distribution as file * LICENCE-LGPL3.html). * * Valentin Tablan, 18 Jul 2012 * * $Id$ */ package gate.mimir.search.terms; import it.unimi.dsi.fastutil.ints.IntArrayList; import it.unimi.dsi.fastutil.objects.ObjectArrayList; import it.unimi.dsi.fastutil.objects.ObjectHeapSemiIndirectPriorityQueue; /** * Boolean OR operator for term queries. * The default count strategy used is * {@link AbstractCompoundTermsQuery.CompoundCountsStrategy#FIRST}. */ public class OrTermsQuery extends AbstractCompoundTermsQuery { /** * Serialization ID. */ private static final long serialVersionUID = 3293699315503739659L; /** * Constructs a new OR terms query. * @param stringsEnabled should terms strings be returned. * @param countsEnabled should term counts be returned. Counts are * accumulated across all sub-queries: the count for a term is the sum of all * counts for the same term in all sub-queries. * @param limit the maximum number of terms to be returned. * @param subQueries the term queries that form the disjunction. */ public OrTermsQuery(TermsQuery... subQueries) { super(subQueries); setCountsStrategy(AbstractCompoundTermsQuery.CompoundCountsStrategy.FIRST); } /* (non-Javadoc) * @see gate.mimir.search.terms.CompoundTermsQuery#combine(gate.mimir.search.terms.TermsResultSet[]) */ @Override public TermsResultSet combine(TermsResultSet... resSets) { return orResultsSets(resSets, countsStrategy); } /** * Given a set of {@link TermsResultSet} values, this method combines them * into a single {@link TermsResultSet} representing the disjunction of all * the provided results sets. * @param resSets * @return */ public static TermsResultSet orResultsSets(TermsResultSet[] resSets, AbstractCompoundTermsQuery.CompoundCountsStrategy countsStrategy) { if(countsStrategy == null) countsStrategy = AbstractCompoundTermsQuery.CompoundCountsStrategy.FIRST; String[] currentTerm = new String[resSets.length]; ObjectHeapSemiIndirectPriorityQueue<String> queue = new ObjectHeapSemiIndirectPriorityQueue<String>(currentTerm); int[] termIndex = new int[resSets.length]; boolean lengthsAvailable = true; boolean descriptionsAvailable = true; boolean origTermsAvailable = true; boolean countsAvailable = true; for(int i = 0; i < resSets.length; i++) { // this implementation requires that all sub-queries return terms in a // consistent order, so we sort them lexicographically by termString TermsResultSet.sortTermsResultSetByTermString(resSets[i]); if(resSets[i].termStrings.length > 0){ termIndex[i] = 0; currentTerm[i] = resSets[i].termStrings[termIndex[i]]; queue.enqueue(i); } // we need *all* sub-queries to provide lengths, because we don't know // which one will provide any of the results. if(resSets[i].termLengths == null) lengthsAvailable = false; if(resSets[i].termCounts == null) countsAvailable = false; if(resSets[i].termDescriptions == null) descriptionsAvailable = false; if(resSets[i].originalTermStrings == null) origTermsAvailable = false; } // prepare local data ObjectArrayList<String> termStrings = new ObjectArrayList<String>(); ObjectArrayList<String> termDescriptions = descriptionsAvailable ? new ObjectArrayList<String>() : null; ObjectArrayList<String[][]> origTerms = origTermsAvailable ? new ObjectArrayList<String[][]>() : null; IntArrayList termLengths = lengthsAvailable ? new IntArrayList() : null; IntArrayList termCounts = countsAvailable ? new IntArrayList() : null; int front[] = new int[resSets.length]; // enumerate all terms top:while(!queue.isEmpty()) { int first = queue.first(); String termString = resSets[first].termStrings[termIndex[first]]; termStrings.add(termString); if(lengthsAvailable) { termLengths.add(resSets[first].termLengths[termIndex[first]]); } if(descriptionsAvailable) { termDescriptions.add(resSets[first].termDescriptions[termIndex[first]]); } if(origTermsAvailable) { origTerms.add(resSets[first].originalTermStrings[termIndex[first]]); } if(countsAvailable) { // sum all counts int frontSize = queue.front(front); int[] counts = new int[frontSize]; for(int i = 0; i < frontSize; i++) { int subRunnerId = front[i]; counts[i]= resSets[subRunnerId].termCounts[termIndex[subRunnerId]]; } termCounts.add(AbstractCompoundTermsQuery.computeCompoundCount(counts, countsStrategy)); } // consume all equal terms while(resSets[first].termStrings[termIndex[first]].equals(termString)) { // advance this subRunner termIndex[first]++; if(termIndex[first] == resSets[first].termStrings.length) { // 'first' is out queue.dequeue(); if(queue.isEmpty()) break top; } else { currentTerm[first] = resSets[first].termStrings[termIndex[first]]; queue.changed(); } first = queue.first(); } } // construct the result TermsResultSet res = new TermsResultSet( termStrings.toArray(new String[termStrings.size()]), lengthsAvailable ? termLengths.toIntArray() : null, countsAvailable ? termCounts.toIntArray() : null, descriptionsAvailable ? termDescriptions.toArray(new String[termDescriptions.size()]) : null); if(origTermsAvailable){ res.originalTermStrings = origTerms.toArray( new String[origTerms.size()][][]); } return res; } }