/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * See LICENSE.txt included in this distribution for the specific * language governing permissions and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at LICENSE.txt. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. */ package org.opensolaris.opengrok.analysis; import java.io.IOException; import java.io.StringReader; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.util.Iterator; import junit.framework.Test; import junit.framework.TestCase; import junit.framework.TestSuite; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; /** * external tests, need to have test-framework on the path this will do a sanity * test on analyzers/tokenizers if they follow latest lucene asserts * * on compile test cp there needs to be lucene-test-framework, lucene-codecs and * randomizedtesting-runner on test src path there can be then the whole * test-framework/src from lucene * * @author Lubos Kosco */ public class LuceneCompatibilityTest extends TestCase { //TODO use reflection to init tests in case LUCENE_TEST_CLASS is present, // create the object out of it and call it's methods public LuceneCompatibilityTest() { super(); } private static final String LUCENE_TEST_CLASS = "org.apache.lucene.analysis.BaseTokenStreamTestCase"; private static final String LUCENE_TEST_METHOD = "assertTokenStreamContents"; private static final String LUCENE_DEP = "com.carrotsearch.randomizedtesting.RandomizedTest"; /** * Create a suite of tests to run. If the lucene test-framework classes are * not present, skip this test. * * @return tests to run */ public static Test suite() { try { Class.forName(LUCENE_DEP); Class.forName(LUCENE_TEST_CLASS); return new TestSuite(LuceneCompatibilityTest.class); } catch (ClassNotFoundException e) { return new TestSuite("LuceneCompatibility - empty (no external lucene test framework on classpath)"); } } Analyzer testA; AnalyzerGuru guru; Method testM; Object testC = null; /** * Set up the test environment with repositories and a cache instance. */ @Override protected void setUp() throws Exception { guru = new AnalyzerGuru(); Class<?> c = Class.forName(LUCENE_TEST_CLASS); //testC = c.newInstance(); //this is static call Class[] argTypes = new Class[]{TokenStream.class, String[].class, int[].class, int[].class, String[].class, int[].class, int[].class, Integer.class, boolean.class}; testM = c.getDeclaredMethod(LUCENE_TEST_METHOD, argTypes); } @Override protected void tearDown() throws Exception { } public void testCompatibility() throws Exception, IOException, IllegalAccessException, IllegalArgumentException { for (Iterator it = guru.getAnalyzerFactories().iterator(); it.hasNext();) { FileAnalyzerFactory fa = (FileAnalyzerFactory) it.next(); String input = "Hello world"; String[] output = new String[]{"Hello", "world"}; testA = fa.getAnalyzer(); String name = testA.getClass().getName(); //below analyzers have no refs // !!!!!!!!!!!!!!!!!!!! // below will fail for some analyzers because of the way how we // deal with data - we don't use the reader, but cache the whole // file instead inside "content" buffer (which is reused for xref) // !!!!!!!!!!!!!!!!!!!! try { if (!name.endsWith("FileAnalyzer") && !name.endsWith("BZip2Analyzer") && !name.endsWith("GZIPAnalyzer") && !name.endsWith("XMLAnalyzer") && !name.endsWith("TroffAnalyzer") && !name.endsWith("ELFAnalyzer") && !name.endsWith("JavaClassAnalyzer") && !name.endsWith("JarAnalyzer") && !name.endsWith("ZipAnalyzer") //TODO below php and fortran analyzers have some problems with dummy input and asserts fail, // analyzers should properly set the tokens in case of wrongly formulated input && !name.endsWith("TarAnalyzer") && !name.endsWith("PhpAnalyzer") && !name.endsWith("FortranAnalyzer")) { System.out.println("Testing refs with " + name); //BaseTokenStreamTestCase.assertTokenStreamContents(testA.tokenStream("refs", new StringReader(input)), output, null, null, null, null, null, input.length()); testM.invoke(testC, testA.tokenStream("refs", new StringReader(input)), output, null, null, null, null, null, input.length(), true); } output = new String[]{"hello", "world"}; //below analyzers have no full, they just wrap data inside them if (!name.endsWith("FileAnalyzer") && !name.endsWith("BZip2Analyzer") && !name.endsWith("GZIPAnalyzer")) { System.out.println("Testing full with " + name); //BaseTokenStreamTestCase.assertTokenStreamContents(testA.tokenStream("full", new StringReader(input)), output, null, null, null, null, null, input.length()); testM.invoke(testC, testA.tokenStream("full", new StringReader(input)), output, null, null, null, null, null, input.length(), true); } } catch (InvocationTargetException x) { Throwable cause = x.getCause(); System.err.println(name + " failed: " + cause.getMessage() + " from " + LUCENE_TEST_CLASS + ":" + LUCENE_TEST_METHOD); throw (new Exception(cause)); } } } }