/* * AnnotationTermQuery.java * * Copyright (c) 2007-2011, The University of Sheffield. * * This file is part of GATE Mímir (see http://gate.ac.uk/family/mimir.html), * and is free software, licenced under the GNU Lesser General Public License, * Version 3, June 2007 (also included with this distribution as file * LICENCE-LGPL3.html). * * Valentin Tablan, 13 Jul 2012 * * $Id$ */ package gate.mimir.search.terms; import gate.mimir.SemanticAnnotationHelper; import gate.mimir.index.AtomicAnnotationIndex; import gate.mimir.index.Mention; import gate.mimir.search.IndexReaderPool; import gate.mimir.search.QueryEngine; import gate.mimir.search.query.AnnotationQuery; import it.unimi.di.big.mg4j.index.IndexIterator; import it.unimi.di.big.mg4j.search.DocumentIterator; import it.unimi.di.big.mg4j.index.IndexReader; import java.io.IOException; import java.util.List; import org.apache.log4j.Logger; /** * Given an {@link AnnotationQuery}, this finds the set of terms that satisfy * it. */ public class AnnotationTermsQuery implements TermsQuery { /** * Serialization ID. */ private static final long serialVersionUID = 777418229209857720L; public AnnotationTermsQuery(AnnotationQuery annotationQuery) { this(annotationQuery, false, false); } public AnnotationTermsQuery(AnnotationQuery annotationQuery, boolean countsEnabled, boolean describeAnnotations) { super(); this.annotationQuery = annotationQuery; this.countsEnabled = countsEnabled; this.describeAnnotations = describeAnnotations; } protected AnnotationQuery annotationQuery; /** * If set to true, term strings for annotation mentions are replaced with * their description (see * {@link SemanticAnnotationHelper#describeMention(String)}. */ protected final boolean describeAnnotations; /** * If set to true, for each returned term (i.e mention URI) the inverted * index is used to provide the number of occurrences. */ protected final boolean countsEnabled; private static final Logger logger = Logger.getLogger(AnnotationTermsQuery.class); /* (non-Javadoc) * @see gate.mimir.search.terms.TermQuery#execute() */ @Override public TermsResultSet execute(QueryEngine engine) throws IOException { // find the semantic annotation helper for the right annotation type SemanticAnnotationHelper helper = engine.getAnnotationHelper(annotationQuery); // ask the helper for the mentions that correspond to this query long start = System.currentTimeMillis(); List<Mention> mentions = helper.getMentions( annotationQuery.getAnnotationType(), annotationQuery.getConstraints(), engine); logger.debug(mentions.size() + " mentions obtained in " + (System.currentTimeMillis() - start) + " ms"); if(mentions.size() > 0) { String[] terms = new String[mentions.size()]; String[] termDescriptions = describeAnnotations ? new String[mentions.size()] : null; int[] counts = null; AtomicAnnotationIndex atomicAnnIndex = null; IndexReader annotationIndexReader = null; try { if(countsEnabled) { counts = new int[mentions.size()]; atomicAnnIndex = engine.getAnnotationIndex( annotationQuery.getAnnotationType()); annotationIndexReader = atomicAnnIndex.getIndex().getReader(); } int[] lengths = new int[mentions.size()]; int index = 0; for(Mention m : mentions) { terms[index] = m.getUri(); lengths[index] = m.getLength(); if(countsEnabled) { counts[index] = 0; IndexIterator iIter = annotationIndexReader.documents(terms[index]); while(iIter.nextDocument() != DocumentIterator.END_OF_LIST) { counts[index] += iIter.count(); } } if(describeAnnotations) { termDescriptions[index] = helper.describeMention(terms[index]); } index++; } return new TermsResultSet(terms, lengths, counts, termDescriptions); } finally { if(annotationIndexReader != null) { annotationIndexReader.close(); } } } else { return TermsResultSet.EMPTY; } } }