/*
* QueryTests.java
*
* Copyright (c) 2007-2011, The University of Sheffield.
*
* This file is part of GATE MÃmir (see http://gate.ac.uk/family/mimir.html),
* and is free software, licenced under the GNU Lesser General Public License,
* Version 3, June 2007 (also included with this distribution as file
* LICENCE-LGPL3.html).
*
* $Id$
*/
package gate.mimir.test;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import gate.Document;
import gate.Gate;
import gate.mimir.AbstractSemanticAnnotationHelper;
import gate.mimir.IndexConfig;
import gate.mimir.MimirIndex;
import gate.mimir.SemanticAnnotationHelper;
import gate.mimir.index.IndexException;
import gate.mimir.search.QueryEngine;
import gate.mimir.search.query.AndQuery;
import gate.mimir.search.query.AnnotationQuery;
import gate.mimir.search.query.Binding;
import gate.mimir.search.query.GapQuery;
import gate.mimir.search.query.OrQuery;
import gate.mimir.search.query.QueryExecutor;
import gate.mimir.search.query.QueryNode;
import gate.mimir.search.query.RepeatsQuery;
import gate.mimir.search.query.SequenceQuery;
import gate.mimir.search.query.TermQuery;
import gate.mimir.search.query.WithinQuery;
import gate.mimir.search.query.parser.ParseException;
import gate.mimir.search.query.parser.QueryParser;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
/**
* A class with tests for the various Mimir query operators.
*/
public class QueryTests {
private static final String resultsPath = "reports/query-results";
private static final String NEW_LINE = System.getProperty("line.separator");
private static Logger logger = Logger.getLogger(QueryTests.class.getName());
public static final String[] helperTypes = System.getProperty(
"helpers.to.test", "gate.mimir.db.DBSemanticAnnotationHelper").split(
"\\s*,\\s*");
/**
* The indexes being tested against
*/
private static File[] indexDirs;
/**
* Prepares the QueryEngine used by all tests.
*/
@BeforeClass
public static void oneTimeSetUp() throws Exception {
Gate.setGateHome(new File("gate-home"));
Gate.setUserConfigFile(new File("gate-home/user-gate.xml"));
Gate.init();
// load the tokeniser plugin
Gate.getCreoleRegister().registerDirectories(new File("gate-home/plugins/ANNIE-tokeniser").toURI().toURL());
// load the DB plugin
Gate.getCreoleRegister().registerDirectories(new File("../plugins/db-h2").toURI().toURL());
// load the measurements plugin
Gate.getCreoleRegister().registerDirectories(new File("../plugins/measurements").toURI().toURL());
indexDirs = new File[helperTypes.length];
for(int i = 0; i < helperTypes.length; i++) {
indexDirs[i] = File.createTempFile("mimir-index", null);
indexDirs[i].delete();
IndexConfig indexConfig = TestUtils.getTestIndexConfig(indexDirs[i],
Class.forName(helperTypes[i], true, Gate.getClassLoader()).asSubclass(
AbstractSemanticAnnotationHelper.class));
// now start indexing the documents
MimirIndex index = new MimirIndex(indexConfig);
String pathToZipFile = "data/gatexml-output.zip";
File zipFile = new File(pathToZipFile);
String fileURI = zipFile.toURI().toString();
ZipFile zip = new ZipFile(pathToZipFile);
Enumeration<? extends ZipEntry> entries = zip.entries();
while(entries.hasMoreElements()) {
ZipEntry entry = entries.nextElement();
if(entry.isDirectory()) {
continue;
}
URL url = new URL("jar:" + fileURI + "!/" + entry.getName());
Document doc = gate.Factory.newDocument(url, "UTF-8");
index.indexDocument(doc);
}
index.close();
}
}
/**
* Closes the shared QueryEngine.
*/
@AfterClass
public static void oneTimeTearDown() {
boolean keepIndex = Boolean.parseBoolean(
System.getProperty("keepTestIndex", "false"));
if(!keepIndex) {
for(File indexDir : indexDirs) {
// recursively delete index dir
if(!TestUtils.deleteDir(indexDir)) {
System.err.println("Could not delete index directory " + indexDir);
}
}
}
}
/**
* Executes two sequence queries, one with gaps and one without and checks
* that the results of one are included in the other one.
*
* @throws IOException
* @throws IndexException
*/
@Test
public void testSequenceQueryGaps() throws IOException, IndexException {
for(File indexDir : indexDirs) {
QueryEngine engine = new MimirIndex(indexDir).getQueryEngine();
TermQuery tq1 = new TermQuery("string", "up");
TermQuery tq2 = new TermQuery("string", "to");
TermQuery tq3 = new TermQuery("string", "the");
SequenceQuery sQuery = new SequenceQuery(null, tq1, tq2, tq3);
SequenceQuery sQueryGaps =
new SequenceQuery(new SequenceQuery.Gap[]{SequenceQuery
.getGap(1, 1)}, tq1, tq3);
List<Binding>[] diff = TestUtils.calculateDiff(sQuery, sQueryGaps, engine);
// second query is more permissive than first
assertNotNull("The two queries returned the same result set!", diff);
assertTrue("The non gaps query has results not included in the gaps one!",
diff[0].isEmpty());
assertTrue("The gaps query returned no additional hits!",
diff[1].size() > 0);
engine.close();
}
}
/**
* Executes two equivalent queries using {@link RepeatsQuery} and
* {@link OrQuery} and compares the results.
*
* @throws IndexException
* @throws IOException
*/
@Test
public void testRepeatsAndOrQueries() throws IndexException, IOException {
for(File indexDir : indexDirs) {
QueryEngine engine = new MimirIndex(indexDir).getQueryEngine();;
Map<String, String> empty = Collections.emptyMap();
AnnotationQuery annQuery = new AnnotationQuery("Measurement", empty);
RepeatsQuery rQuery = new RepeatsQuery(annQuery, 1, 3);
OrQuery orQuery =
new OrQuery(annQuery, new SequenceQuery(null, annQuery, annQuery),
new SequenceQuery(null, annQuery, annQuery, annQuery));
List<Binding>[] diff = TestUtils.calculateDiff(rQuery, orQuery, engine);
if(diff != null) {
System.err.println(TestUtils.printDiffResults(diff, engine));
}
assertNull("Repeats query result different from equivalent OR query. "
+ "See system.err for details!", diff);
engine.close();
}
}
/**
* Executes two equivalent queries using {@link SequenceQuery} and
* {@link RepeatsQuery} and compares the results.
*
* @throws IndexException
* @throws IOException
*/
@Test
public void testSequenceAndRepeatsQueries() throws IndexException,
IOException {
for(File indexDir : indexDirs) {
QueryEngine engine = new MimirIndex(indexDir).getQueryEngine();;
Map<String, String> empty = Collections.emptyMap();
AnnotationQuery annQuery = new AnnotationQuery("Measurement", empty);
SequenceQuery sQuery =
new SequenceQuery(null, annQuery, annQuery, annQuery);
RepeatsQuery rQuery = new RepeatsQuery(annQuery, 3, 3);
List<Binding>[] diff = TestUtils.calculateDiff(sQuery, rQuery, engine);
if(diff != null) {
System.err.println(TestUtils.printDiffResults(diff, engine));
}
assertNull("Repeats query result different from equivalent OR query. "
+ "See system.err for details!", diff);
engine.close();
}
}
/**
* Executes three equivalent queries using different gap implementations
* coming from {@link SequenceQuery}, {@link TermQuery} and {@link GapQuery}
* and compares the results.
*
* @throws IndexException
* @throws IOException
* @throws IOException
*/
@Test
public void testGapImplementations() throws IndexException, IOException {
for(File indexDir : indexDirs) {
QueryEngine engine = new MimirIndex(indexDir).getQueryEngine();;
TermQuery tq1 = new TermQuery("string", "up");
TermQuery tq3 = new TermQuery("root", "the");
SequenceQuery sQuery1 =
new SequenceQuery(new SequenceQuery.Gap[]{SequenceQuery
.getGap(1, 1)}, tq1, tq3);
TermQuery tq1Gap = new TermQuery(QueryEngine.IndexType.TOKENS, "string", "up", 2);
SequenceQuery sQuery2 = new SequenceQuery(null, tq1Gap, tq3);
GapQuery gQ1 = new GapQuery(tq1, 1);
SequenceQuery sQuery3 = new SequenceQuery(null, gQ1, tq3);
assertTrue("Not all results are the same!", TestUtils.allEqual(engine,
sQuery1, sQuery2, sQuery3));
engine.close();
}
}
/**
* Tests the functionality of the result set diff algorithm in
* {@link TestUtils}.
*
* @throws IOException
* @throws IndexException
*/
@Test
public void testDiffer() throws IOException, IndexException {
File indexDir = indexDirs[0];
QueryEngine engine = new MimirIndex(indexDir).getQueryEngine();;
String[] terms = new String[]{"up", "to", "the"};
TermQuery[] tqs = new TermQuery[terms.length];
for(int i = 0; i < terms.length; i++) {
tqs[i] = new TermQuery("string", terms[i]);
}
SequenceQuery seqQuery = new SequenceQuery(null, tqs);
List<Binding>[] res = TestUtils.calculateDiff(seqQuery, seqQuery, engine);
assertNull("Different results from the same query!", res);
engine.close();
}
@Test
public void annotationQuery() throws IndexException, IOException {
String[] qResNames = new String[indexDirs.length];
for(int i = 0; i < indexDirs.length; i++) {
QueryEngine engine = new MimirIndex(indexDirs[i]).getQueryEngine();;
Map<String, String> constraints = new HashMap<String, String>();
constraints.put("spec", "1 to 32 degF");
AnnotationQuery annQuery = new AnnotationQuery("Measurement", constraints);
qResNames[i] = "annotation-" + i;
performQuery(qResNames[i], annQuery, engine);
engine.close();
}
if(qResNames.length > 1) {
assertTrue("Got different results from different helpers", identical(qResNames));
}
}
@Test
public void testStringSequenceQuery() throws IndexException, IOException {
String[] qResNames = new String[indexDirs.length];
for(int i = 0; i < indexDirs.length; i++) {
QueryEngine engine = new MimirIndex(indexDirs[i]).getQueryEngine();
String[] terms = new String[] {"up", "to", "the"};
// String[] terms = new String[]{"ability", "of", /*"the", "agent",*/ "to",
// "form", "an", "acid", "or", "base", "upon", "heating", "whereby",
// "dehydrating", "cellulose", "at", "a", "low", "temperature",
// "within", "a", "short", "period", "to", "yield", "water", "and",
// "carbon"};
TermQuery[] termQueries = new TermQuery[terms.length];
for(int j = 0; j < terms.length; j++) {
termQueries[j] = new TermQuery("string", terms[j]);
}
SequenceQuery.Gap[] gaps = new SequenceQuery.Gap[28];
gaps[1] = SequenceQuery.getGap(2, 3);
SequenceQuery sequenceQuery = new SequenceQuery(null/* gaps */, termQueries);
qResNames[i] = "termSequence-" + i;
performQuery(qResNames[i], sequenceQuery, engine);
engine.close();
}
if(qResNames.length > 1) {
assertTrue("Got different results from different helpers", identical(qResNames));
}
}
@Test
public void testCategorySequenceQuery() throws IndexException, IOException {
String[] qResNames = new String[indexDirs.length];
for(int i = 0; i < indexDirs.length; i++) {
QueryEngine engine = new MimirIndex(indexDirs[i]).getQueryEngine();
String[] terms = new String[]{"NN", "NN", "NN"};
TermQuery[] termQueries = new TermQuery[terms.length];
for(int j = 0; j < terms.length; j++) {
termQueries[j] = new TermQuery("category", terms[j]);
}
SequenceQuery.Gap[] gaps = new SequenceQuery.Gap[28];
gaps[1] = SequenceQuery.getGap(2, 3);
SequenceQuery sequenceQuery = new SequenceQuery(null/* gaps */, termQueries);
qResNames[i] = "categorySequence-" + i;
performQuery(qResNames[i], sequenceQuery, engine);
engine.close();
}
if(qResNames.length > 1) {
assertTrue("Got different results from different helpers", identical(qResNames));
}
}
@Test
public void testAnnotationSequenceQuery() throws IndexException, IOException {
String[] qResNames = new String[indexDirs.length];
for(int i = 0; i < indexDirs.length; i++) {
QueryEngine engine = new MimirIndex(indexDirs[i]).getQueryEngine();
Map<String, String> empty = Collections.emptyMap();
AnnotationQuery annQuery = new AnnotationQuery("Measurement", empty);
SequenceQuery sequenceQuery = new SequenceQuery(null/* gaps */, annQuery, annQuery, annQuery);
qResNames[i] = "annotationSequence-" + i;
performQuery(qResNames[i], sequenceQuery, engine);
engine.close();
}
if(qResNames.length > 1) {
assertTrue("Got different results from different helpers", identical(qResNames));
}
}
@Test
public void testRepeatsQuery() throws IndexException, IOException {
String[] qResNames = new String[indexDirs.length];
for(int i = 0; i < indexDirs.length; i++) {
QueryEngine engine = new MimirIndex(indexDirs[i]).getQueryEngine();
Map<String, String> empty = Collections.emptyMap();
AnnotationQuery annQuery = new AnnotationQuery("Measurement", empty);
RepeatsQuery repeatsQuery = new RepeatsQuery(annQuery, 3, 3);
qResNames[i] = "repeats-" + i;
performQuery(qResNames[i], repeatsQuery, engine);
engine.close();
}
if(qResNames.length > 1) {
assertTrue("Got different results from different helpers", identical(qResNames));
}
}
@Test
public void testWithinQuery() throws IndexException, IOException {
String[] qResNames = new String[indexDirs.length];
for(int i = 0; i < indexDirs.length; i++) {
QueryEngine engine = new MimirIndex(indexDirs[i]).getQueryEngine();
AnnotationQuery intervalQuery = new AnnotationQuery("Measurement", Collections.singletonMap("type", "interval"));
TermQuery toQuery = new TermQuery("string", "to");
WithinQuery withinQuery = new WithinQuery(toQuery, intervalQuery);
qResNames[i] = "within-" + i;
performQuery(qResNames[i], withinQuery, engine);
engine.close();
}
if(qResNames.length > 1) {
assertTrue("Got different results from different helpers", identical(qResNames));
}
}
@Test
public void testInAndQuery() throws IndexException, IOException {
String[] qResNames = new String[indexDirs.length];
for(int i = 0; i < indexDirs.length; i++) {
QueryEngine engine = new MimirIndex(indexDirs[i]).getQueryEngine();
QueryNode inAndQuery = new WithinQuery(new AndQuery(new TermQuery(null, "London"),
new TermQuery(null, "press")), new AnnotationQuery(
"Reference", new HashMap<String, String>()));
qResNames[i] = "inAnd-" + i;
performQuery(qResNames[i], inAndQuery, engine);
engine.close();
}
if(qResNames.length > 1) {
assertTrue("Got different results from different helpers", identical(qResNames));
}
}
@Test
public void testMeasurementSpecQuery() throws IndexException, IOException {
String[] qResNames = new String[indexDirs.length];
for(int i = 0; i < indexDirs.length; i++) {
QueryEngine engine = new MimirIndex(indexDirs[i]).getQueryEngine();
AnnotationQuery specQuery = new AnnotationQuery("Measurement", Collections.singletonMap("spec", "5 cm"));
qResNames[i] = "measurementSpec-" + i;
performQuery(qResNames[i], specQuery, engine);
engine.close();
}
if(qResNames.length > 1) {
assertTrue("Got different results from different helpers", identical(qResNames));
}
}
@Test
public void testQueryEngineRenderDocument() throws IndexException, IOException {
for(File indexDir : indexDirs) {
QueryEngine engine = new MimirIndex(indexDir).getQueryEngine();
List<Binding> hits = new ArrayList<Binding>();
hits.add(new Binding(null, 0, 100, 5, null));
hits.add(new Binding(null, 0, 110, 4, null));
try {
engine.renderDocument(0, hits, new FileWriter(resultsPath + "/renderDocumentResult.txt"));
} catch(Exception e) {
fail(e.getMessage());
}
engine.close();
}
}
/**
* Test the semantic annotation helpers used for indexing document features
* @throws IndexException
* @throws ParseException
* @throws IOException
*/
@Test
public void testDocumentMode() throws IndexException, ParseException, IOException {
String[] qResNames = new String[indexDirs.length];
String[] qResNames2 = new String[indexDirs.length];
for(int i = 0; i < indexDirs.length; i++) {
QueryEngine engine = new MimirIndex(indexDirs[i]).getQueryEngine();
QueryNode qNode = QueryParser.parse("{Document}");
qResNames[i] = "doc-" + i;
int hits = performQuery(qResNames[i], qNode, engine);
qNode = QueryParser.parse("{Document date > 20070000}");
qResNames2[i] = "docFeats-" + i;
int hits2 = performQuery(qResNames2[i], qNode, engine);
assertTrue("Feature filtering does not reduce the result set!",
hits2 < hits);
engine.close();
}
if(qResNames.length > 1) {
assertTrue("Got different results from different helpers", identical(qResNames));
assertTrue("Got different results from different helpers", identical(qResNames2));
}
}
private int performQuery(String name, QueryNode query, QueryEngine engine) {
QueryExecutor executor = null;
int hitCount = 0;
BufferedWriter writer = null;
try {
File resultsDirectory = new File(resultsPath);
if (!resultsDirectory.exists()) resultsDirectory.mkdirs();
executor = query.getQueryExecutor(engine);
writer = new BufferedWriter(new FileWriter(resultsPath + "/" + name + "QueryResult.xml"));
writer.write("<query query=\"" + query.toString() + "\">");
writer.newLine();
writer.write("\t<hits>");
writer.newLine();
while (executor.nextDocument(-1) != -1) {
Binding hit = executor.nextHit();
while(hit != null) {
hitCount++;
writer.write("\t\t<hit number=\"" + hitCount + "\">");
writer.write(getHitString(hit, engine));
writer.write("</hit>\n");
hit = executor.nextHit();
}
}
writer.write("\t</hits>");
writer.newLine();
writer.write("</query>");
writer.newLine();
writer.flush();
} catch(Exception e) {
e.printStackTrace();
fail(e.getMessage());
} finally {
try {
if (writer != null) writer.close();
if(executor != null) executor.close();
} catch(Exception e) {
fail(e.getMessage());
}
}
return hitCount;
}
/**
* Compares the files resulting from the execution of two or more queries.
* The queries must have been performed previously, by calling
* {@link #performQuery(String, QueryNode, QueryEngine)}.
* @param queryNames the names of the queries to compare.
* @return
* @throws IOException
*/
private static boolean identical(String... queryNames) throws IOException {
BufferedReader[] readers = new BufferedReader[queryNames.length];
for(int i = 0; i < queryNames.length; i++) {
readers[i] = new BufferedReader(new FileReader(resultsPath + "/" +
queryNames[i] + "QueryResult.xml"));
}
String line = null;
do {
line = readers[0].readLine();
for(int i = 1; i < readers.length; i++) {
String anotherLine = readers[i].readLine();
if(line != null) {
if(anotherLine == null || !line.equals(anotherLine)){
logger.warning("Assersion error: result sets not identical. " +
"First difference:\n Line (0):" + line +
"\nLine (" + i + "):" + (anotherLine == null ? "null" : anotherLine));
return false;
}
} else {
if(anotherLine != null){
logger.warning("Assersion error: result sets not identical. " +
"First difference:\n Line (0): null" + line +
"\nLine (" + i + "):" + anotherLine);
return false;
}
}
}
} while(line != null);
return true;
}
private String getHitString(Binding hit, QueryEngine searcher) throws IndexException
{
StringBuilder sb = new StringBuilder();
String[][] text = searcher.getLeftContext(hit, 2);
appendHitText(hit, text, sb);
text = searcher.getHitText(hit);
appendHitText(hit, text, sb);
text = searcher.getRightContext(hit, 2);
appendHitText(hit, text, sb);
return sb.toString().replace(NEW_LINE, " ");
}
private void appendHitText(Binding hit, String[][] text, StringBuilder sb)
{
int length = Math.min(text[0].length, text[1].length);
for (int i = 0; i < length; ++i)
{
final String token = text[0][i];
final String space = text[1][i];
sb.append(token != null ? token : "").append(space != null ? space : " ");
}
}
}