TestUtils.java example

Explorer

mimir-master
- etc
  - generated-header.java
- mimir-client
  - src
    - gate
      - mimir
        index
        MimirConnector.java
        MimirIndexingPR.java
        search
        RemoteQueryRunner.java
        tool
        WebUtils.java
- mimir-cloud
  - archive-unpacker
    - src
      - gate
        mimir
        util
        MultiFileInputStream.java
        UnpackWizard.java
  - src
    - java
      - gate
        mimir
        util
        IndexArchiveState.java
        MultiFileOutputStream.java
- mimir-core
  - src
    - gate
      - mimir
        AbstractSemanticAnnotationHelper.java
        Constraint.java
        ConstraintType.java
        DocumentMetadataHelper.java
        DocumentRenderer.java
        IndexConfig.java
        MimirIndex.java
        SemanticAnnotationHelper.java
        index
        AtomicAnnotationIndex.java
        AtomicIndex.java
        AtomicTokenIndex.java
        DocumentCollection.java
        DocumentData.java
        GATEDocument.java
        GATEDocumentFactory.java
        IndexException.java
        Mention.java
        OriginalMarkupMetadataHelper.java
        package-info.java
        search
        FederatedQueryRunner.java
        IndexReaderPool.java
        QueryEngine.java
        QueryRunner.java
        RankingQueryRunnerImpl.java
        query
        AbstractIntersectionQueryExecutor.java
        AbstractOverlapQuery.java
        AbstractQueryExecutor.java
        AndQuery.java
        AnnotationQuery.java
        Binding.java
        ConstQuery.java
        ContainsQuery.java
        ExecutorsList.java
        GapQuery.java
        MinusQuery.java
        OrQuery.java
        QueryExecutor.java
        QueryNode.java
        RepeatsQuery.java
        SequenceQuery.java
        TermQuery.java
        WithinQuery.java
        parser
        ParseException.java
        Query.java
        QueryParser.java
        QueryParserConstants.java
        QueryParserTokenManager.java
        SimpleCharStream.java
        Token.java
        TokenMgrError.java
        score
        BindingScorer.java
        DelegatingScoringQueryExecutor.java
        MimirScorer.java
        terms
        AbstractCompoundTermsQuery.java
        AbstractDocumentsBasedTermsQuery.java
        AbstractIndexTermsQuery.java
        AndTermsQuery.java
        AnnotationTermsQuery.java
        CompoundTermsQuery.java
        ConstTermsQuery.java
        DocumentTermsQuery.java
        DocumentsAndTermsQuery.java
        DocumentsBasedTermsQuery.java
        DocumentsOrTermsQuery.java
        LimitTermsQuery.java
        OrTermsQuery.java
        SortedTermsQuery.java
        TermTypeTermsQuery.java
        TermsQuery.java
        TermsResultSet.java
        util
        DefaultMentionDescriber.java
        DelegatingSemanticAnnotationHelper.java
        DocumentFeaturesMetadataHelper.java
        IgnoreEmptiesTermProcessor.java
        IndexUpgrader.java
        MG4JTools.java
        NormalizingTermProcessor.java
        OntologyMentionDescriber.java
        TruncateIndex.java
- mimir-test
  - src
    - gate
      - mimir
        test
        QueryTests.java
        RenderZipCollection.java
        Scratch.java
        ScratchConsole.java
        TestQueryParser.java
        TestUtils.java
- mimir-web
  - src
    - gwt
      - gate
        mimir
        web
        client
        UI.java
    - java
      - gate
        mimir
        util
        LogAnalyser.java
        web
        client
        DocumentData.java
        GwtRpcService.java
        GwtRpcServiceAsync.java
        MimirSearchException.java
        ResultsData.java
- plugins
  - db-h2
    - src
      - gate
        mimir
        db
        AnnotationTemplateCache.java
        DBSemanticAnnotationHelper.java
  - measurements
    - src
      - gate
        mimir
        measurements
        MeasurementAnnotationHelper.java
        MeasurementPluginResource.java
  - sparql
    - src
      - gate
        mimir
        sparql
        RequestMethod.java
        SPARQLResultSet.java
        SPARQLSemanticAnnotationHelper.java

/*
 *  TestUtils.java
 *
 *  Copyright (c) 2007-2011, The University of Sheffield.
 *
 *  This file is part of GATE Mímir (see http://gate.ac.uk/family/mimir.html), 
 *  and is free software, licenced under the GNU Lesser General Public License,
 *  Version 3, June 2007 (also included with this distribution as file
 *  LICENCE-LGPL3.html).
 *
 *  $Id$
 */
package gate.mimir.test;

import gate.Gate;
import gate.creole.ANNIEConstants;
import gate.mimir.index.*;
import gate.mimir.AbstractSemanticAnnotationHelper;
import gate.mimir.DocumentMetadataHelper;
import gate.mimir.IndexConfig;
import gate.mimir.SemanticAnnotationHelper;
import gate.mimir.IndexConfig.SemanticIndexerConfig;
import gate.mimir.IndexConfig.TokenIndexerConfig;
import gate.mimir.SemanticAnnotationHelper.Mode;
import gate.mimir.search.QueryEngine;
import gate.mimir.search.query.*;

import it.unimi.di.big.mg4j.index.DowncaseTermProcessor;
import it.unimi.di.big.mg4j.index.NullTermProcessor;

import java.io.*;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.*;



/**
 * A collection of utility methods in support of tests. 
 */
public class TestUtils {
  
  public static IndexConfig getTestIndexConfig(File indexDir, 
		  Class<? extends AbstractSemanticAnnotationHelper> helperClass) 
  throws IllegalArgumentException, InstantiationException, 
      IllegalAccessException, InvocationTargetException, SecurityException, 
      NoSuchMethodException, ClassNotFoundException {
    
	Class<? extends SemanticAnnotationHelper> measurementsHelperClass =
	  Class.forName("gate.mimir.measurements.MeasurementAnnotationHelper",
	          true, Gate.getClassLoader()).asSubclass(SemanticAnnotationHelper.class);
	SemanticAnnotationHelper measurementHelper = measurementsHelperClass.newInstance();
	measurementsHelperClass.getMethod("setDelegateHelperType", Class.class)
	  .invoke(measurementHelper, helperClass);
	
    // simple metadata helper for HTML tags
    OriginalMarkupMetadataHelper docHelper = new OriginalMarkupMetadataHelper(
        new HashSet<String>(Arrays.asList(
            new String[] {
              "b", "i", "li", "ol", "p", "sup", "sub", "u", "ul"})));
    
    
    // index configuration used for testing.
    return new IndexConfig(
            indexDir,
            "mimir",
            ANNIEConstants.TOKEN_ANNOTATION_TYPE,
            "mimir",
            new TokenIndexerConfig[]{
                new TokenIndexerConfig(
                        ANNIEConstants.TOKEN_STRING_FEATURE_NAME, 
                        DowncaseTermProcessor.getInstance(), true),
                new TokenIndexerConfig(
                        ANNIEConstants.TOKEN_CATEGORY_FEATURE_NAME, 
                        NullTermProcessor.getInstance(), true),
                new TokenIndexerConfig(
                        "root", 
                        NullTermProcessor.getInstance(), true)
            }, 
            new SemanticIndexerConfig[]{
                new SemanticIndexerConfig(
                    new String[]{"Measurement"}, 
                    new SemanticAnnotationHelper[] {measurementHelper}, true),
                new SemanticIndexerConfig(
                        new String[]{"PublicationAuthor", "PublicationDate",
                                "PublicationLocation", "PublicationPages",
                                "Reference", "Section", "Sentence"}, 
                        new SemanticAnnotationHelper[] {
                            createHelper(helperClass, "PublicationAuthor", null, null, null, null, null, Mode.ANNOTATION),
                            createHelper(helperClass, "PublicationDate", null, null, null, null, null, Mode.ANNOTATION),
                            createHelper(helperClass, "PublicationLocation", null, null, null, null, null, Mode.ANNOTATION),
                            createHelper(helperClass, "PublicationPages", null, null, null, null, null, Mode.ANNOTATION),
                            createHelper(helperClass, "Reference", new String[]{"type"}, null, null, null, null, Mode.ANNOTATION),
                            createHelper(helperClass, "Section", new String[]{"type"}, null, null, null, null, Mode.ANNOTATION),
                            createHelper(helperClass, "Sentence", null, null, null, null, null, Mode.ANNOTATION)}, 
                            true),

                new SemanticIndexerConfig(
                        new String[]{"Abstract", "Assignee",
                                "ClassificationIPCR", "InventionTitle",
                                "Inventor", "Document", "PriorityClaim"}, 
                        new SemanticAnnotationHelper[] {
                          createHelper(helperClass, "Abstract", new String[]{"lang"}, null, null, null, null, Mode.ANNOTATION),
                          createHelper(helperClass, "Assignee", null, null, null, null, null, Mode.ANNOTATION),
                          createHelper(helperClass, "ClassificationIPCR", new String[]{"status"}, null, null, null, null, Mode.ANNOTATION),
                          createHelper(helperClass, "InventionTitle", new String[]{"lang", "status"}, null, null, null, null, Mode.ANNOTATION),
                          createHelper(helperClass, "Inventor", new String[]{"format", "status"}, null, null, null, null, Mode.ANNOTATION),
                          createHelper(helperClass, "Document", null, new String[]{"date"}, null, new String[]{"ucid"}, null, Mode.DOCUMENT),
                          createHelper(helperClass, "PriorityClaim", null, null, null, new String[]{"ucid"}, null, Mode.ANNOTATION)},
                          true)
            },
            new DocumentMetadataHelper[] {docHelper}, 
            docHelper);
  }


  public static SemanticAnnotationHelper createHelper(Class<? extends AbstractSemanticAnnotationHelper> helperClass, String annType,
                                               String[] nominalFeatures, String[] integerFeatures, String[] floatFeatures,
                                               String[] textFeatures, String[] uriFeatures, SemanticAnnotationHelper.Mode mode) throws InstantiationException, IllegalAccessException {
    AbstractSemanticAnnotationHelper helper = helperClass.newInstance();
    helper.setAnnotationType(annType);
    helper.setNominalFeatures(nominalFeatures);
    helper.setIntegerFeatures(integerFeatures);
    helper.setFloatFeatures(floatFeatures);
    helper.setTextFeatures(textFeatures);
    helper.setUriFeatures(uriFeatures);
    helper.setMode(mode);
    return helper;
  }
  

  
  /**
   * Executes two different queries and returns two lists of results: one with
   * hits that only appear in the first query, the other with hits that only 
   * appear in the second.
   * 
   * The hits from the two queries are only compared in terms of document id, 
   * term position, and hit length (i.e. the sub-bindings are ignored).
   * 
   * @param left the first query to be executed.
   * @param right the second query to be executed.
   * @return an array containing two lists. The first element is a list with 
   * hits that only occur in the <code>left</code> query; the second element is
   * a list of hits that only occur in the <code>right</code> query. If the diff
   * result is empty (the two query gave rise to identical results) then 
   * <code>null</code> is returned instead. 
   * @throws IOException 
   */
  public static List<Binding>[] calculateDiff(QueryNode left, 
          QueryNode right, QueryEngine engine) throws IOException{
    List<Binding> onlyInLeft = new ArrayList<Binding>();
    List<Binding> onlyInRight = new ArrayList<Binding>();
    
    QueryExecutor leftExecutor = left.getQueryExecutor(engine);
    QueryExecutor rightExecutor = right.getQueryExecutor(engine);
    
    long leftDoc = leftExecutor.nextDocument(-1);
    long rightDoc = rightExecutor.nextDocument(-1);
    while(leftDoc != -1 || rightDoc != -1){
      //at least one doc is not -1
      if(leftDoc == -1){
        //extra document in right
        Binding aHit = rightExecutor.nextHit();
        while(aHit != null){
          onlyInRight.add(aHit);
          aHit = rightExecutor.nextHit();
        }
        //move right to next doc
        rightDoc = rightExecutor.nextDocument(-1);        
      }else if(rightDoc == -1){
        //extra document in left -> add all hits from this document
        Binding aHit = leftExecutor.nextHit();
        while(aHit != null){
          onlyInLeft.add(aHit);
          aHit = leftExecutor.nextHit();
        }
        //move left to next document
        leftDoc = leftExecutor.nextDocument(-1);        
      }else if(leftDoc < rightDoc){
        //extra document in left -> add all hits from this document
        Binding aHit = leftExecutor.nextHit();
        while(aHit != null){
          onlyInLeft.add(aHit);
          aHit = leftExecutor.nextHit();
        }
        //move left to next document
        leftDoc = leftExecutor.nextDocument(-1);
      }else if(leftDoc > rightDoc){
        //extra document in right
        Binding aHit = rightExecutor.nextHit();
        while(aHit != null){
          onlyInRight.add(aHit);
          aHit = rightExecutor.nextHit();
        }
        //move right to next doc
        rightDoc = rightExecutor.nextDocument(-1);
      }else{
        //both left and right on the same document -> compare the hits
        //first collect all hits on this document, for each executor
        List<Binding> leftHits = new ArrayList<Binding>();
        Binding leftHit = leftExecutor.nextHit();
        while(leftHit != null){
          leftHits.add(leftHit);
          leftHit = leftExecutor.nextHit();
        }
        Collections.sort(leftHits);
        List<Binding> rightHits = new ArrayList<Binding>();
        Binding rightHit = rightExecutor.nextHit();
        while(rightHit != null){
          rightHits.add(rightHit);
          rightHit = rightExecutor.nextHit();
        }
        Collections.sort(rightHits);
        Iterator<Binding> leftIter = leftHits.iterator();
        Iterator<Binding> rightIter = rightHits.iterator();
        leftHit = leftIter.hasNext() ? leftIter.next() : null;
        rightHit = rightIter.hasNext() ? rightIter.next(): null;
        while(leftHit != null || rightHit != null){
          //at least one of the hits is non-null!
          if(rightHit == null){
            //extra hit in left
            onlyInLeft.add(leftHit);
            leftHit = rightIter.hasNext() ? rightIter.next(): null;            
          }else if(leftHit == null){
            //extra hit in right
            onlyInRight.add(rightHit);
            rightHit = rightIter.hasNext() ? rightIter.next(): null;            
          }else if(leftHit.getTermPosition() < rightHit.getTermPosition()){
            //extra hit in left
            onlyInLeft.add(leftHit);
            leftHit = leftIter.hasNext() ? leftIter.next() : null;
          }else if (rightHit.getTermPosition() < leftHit.getTermPosition()){
            //extra hit in right
            onlyInRight.add(rightHit);
            rightHit = rightIter.hasNext() ? rightIter.next(): null;
          }else{
            //same term position -> compare length
            if(leftHit.getLength() < rightHit.getLength()){
              //extra hit in left
              onlyInLeft.add(leftHit);
              leftHit = leftIter.hasNext() ? leftIter.next() : null;              
            }else if(leftHit.getLength() > rightHit.getLength()){
              //extra hit in right
              onlyInRight.add(rightHit);
              rightHit = rightIter.hasNext() ? rightIter.next(): null;
            }else{
              //same hits -> advance both
              leftHit = leftIter.hasNext() ? leftIter.next() : null;
              rightHit = rightIter.hasNext() ? rightIter.next(): null;              
            }
          }
        }
        //advance both left and right to next docs
        leftDoc = leftExecutor.nextDocument(-1);
        rightDoc = rightExecutor.nextDocument(-1);
      }
    }//while(leftDoc != -1 || rightDoc != -1)
    leftExecutor.close();
    rightExecutor.close();
    return (onlyInLeft.size() + onlyInRight.size() == 0) ? 
           null :  
           new List[]{onlyInLeft, onlyInRight};
  }
  
  
  /**
   * Compares the results from a set of query executors. It uses all the results 
   * from each of the executors, and closes them. 
   */
  public static boolean allEqual(QueryEngine engine, QueryNode... nodes) 
      throws IOException{
    QueryExecutor[] executors = new QueryExecutor[nodes.length];
    File[] files = new File[executors.length];
    BufferedReader[] readers = new BufferedReader[executors.length];
    for(int i = 0; i< executors.length; i++){
      executors[i] = nodes[i].getQueryExecutor(engine);
      files[i] = File.createTempFile("query-" + i, ".txt");
      dumpResultsToFile(executors[i], files[i]);
      readers[i] = new BufferedReader(new FileReader(files[i]));
    }
    //now compare the results
    boolean finished = false;
    boolean equal = true;
    String oldLine = null;
    String line = null;
    while(!finished){
      for(int i = 0; i < readers.length; i++){
        if(i == 0){
          oldLine = readers[i].readLine();
        }else{
          line = readers[i].readLine();
          if(oldLine == null){
            if(line != null){
              finished = true;
              equal = false;
            }
          }else{
            //oldLine not null
            if(line == null){
              finished = true;
              equal = false;              
            }else if(!oldLine.equals(line)){
              finished = true;
              equal = false;
            }
          }
        }
      }
      if(!finished && oldLine == null){
        finished = true;
      }
    }
    //close all resources and delete all the files
    for(int i = 0; i < files.length; i++){
      readers[i].close();
      files[i].delete();
    }
    return equal;
  }
  
  /**
   * Creates a {@link QueryExecutor} for the given {@link QueryNode}, obtains 
   * all the hits from it, represents them string containing document ID, term
   * position and length, sorts all the hit strings, and saves them to a file,
   * one on each line. 
   * @param query
   * @param engine
   * @param file
   * @throws IOException
   */
  public static void dumpResultsToFile(QueryExecutor executor, File file) throws IOException{
    Writer writer = new BufferedWriter(new FileWriter(file));
    writer.write("Doc ID, Position, Length\n");
    List<String> lines = new ArrayList<String>();
    long docId = executor.nextDocument(-1);
    while(docId  != -1){
      Binding aHit = executor.nextHit();
      while(aHit != null){
        lines.add(aHit.getDocumentId() + ", " + 
                + aHit.getTermPosition() + ", " + 
                aHit.getLength());
        aHit = executor.nextHit();
      }
      //we have all the hits on a document
      Collections.sort(lines);
      for(String line : lines){
        writer.write(line);
        writer.write("\n");
      }      
      docId = executor.nextDocument(-1);
    }
    executor.close();
    writer.close();
  }


  /**
   * Creates a textual representation for a diff result.
   * @param diff
   * @param engine
   * @return
   * @throws IOException
   */
  public static String printDiffResults(List<Binding>[] diff, 
          QueryEngine engine) throws IndexException{
    StringBuilder diffStr = new StringBuilder();
    diffStr.append("Only in LEFT Query\n");
    for(Binding aHit : diff[0]){
      diffStr.append("Document " + aHit.getDocumentId() + 
              "(" + aHit.getTermPosition() + ", " + aHit.getLength() + "): ");
      String[][] hitText = engine.getHitText(aHit, 0,0);
      String word = null;
      String nonWord = null;
      for(int i = 0; i < hitText[0].length; i++){
        word = i < hitText[0].length ? hitText[0][i] : "";
        nonWord = i < hitText[1].length ? hitText[1][i] : "";
        diffStr.append(word == null ? "" : word );
        diffStr.append(nonWord == null ? "" : nonWord);
      }
      diffStr.append('\n');
    }
    diffStr.append("<<<<<<<<<<<<<<!>>>>>>>>>>>>>>>>>\n");
    diffStr.append("Only in RIGHT Query\n");
    for(Binding aHit : diff[1]){
      diffStr.append("Document " + aHit.getDocumentId() + 
              "(" + aHit.getTermPosition() + ", " + aHit.getLength() + "): ");
      String[][] hitText = engine.getHitText(aHit, 0,0);
      String word = null;
      String nonWord = null;
      for(int i = 0; i < hitText[0].length; i++){
        word = i < hitText[0].length ? hitText[0][i] : "";
        nonWord = i < hitText[1].length ? hitText[1][i] : "";
        diffStr.append(word == null ? "" : word );
        diffStr.append(nonWord == null ? "" : nonWord);
      }
      diffStr.append('\n');
    }
    return diffStr.toString();
  }
  
  /**
   * Deletes a directory recursively. Use with caution!
   * @param directory the directory to be deleted.
   * @return <code>true</code> if the directory was deleted successfully.
   */
  public static boolean deleteDir(File directory){
    boolean success = true;
    if(directory.isDirectory()){
      File[] files = directory.listFiles();
      for(File aFile : files){
        if(aFile.isFile()){
          success &= aFile.delete();
        }else{
          success &= deleteDir(aFile);
        }
      }
      //now the dir should be empty
      success &= directory.delete();
    }else{
      success = directory.delete();
    }
    return success;
  }
  
}