/* * Copyright 2010 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.tudarmstadt.ukp.dkpro.core.api.io; import static org.apache.uima.fit.factory.CollectionReaderFactory.createReader; import static org.apache.uima.fit.factory.ExternalResourceFactory.createExternalResourceDescription; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.File; import java.io.IOException; import org.apache.uima.cas.CAS; import org.apache.uima.collection.CollectionException; import org.apache.uima.collection.CollectionReader; import org.apache.uima.resource.ExternalResourceDescription; import org.apache.uima.resource.ResourceInitializationException; import org.apache.uima.util.CasCreationUtils; import org.junit.BeforeClass; import org.junit.Ignore; import org.junit.Test; import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData; public class ResourceCollectionReaderBaseTest { @BeforeClass public static void before() { // Route logging through log4j System.setProperty("org.apache.uima.logger.class", "org.apache.uima.util.impl.Log4jLogger_impl"); } @Test public void testClasspath() throws Exception { CollectionReader reader = createReader(DummyReader.class, ResourceCollectionReaderBase.PARAM_SOURCE_LOCATION, "classpath*:/de/tudarmstadt/ukp/", ResourceCollectionReaderBase.PARAM_PATTERNS, new String[] { "[+]**/FileSetCollectionReaderBase.class", "[-]**/ResourceCollectionReaderBase.class" }); searchForResourceCollectionReaderBase(reader); } @Test public void testZip() throws Exception { CollectionReader reader = createReader(DummyReader.class, ResourceCollectionReaderBase.PARAM_SOURCE_LOCATION, "jar:file:src/test/resources/testfiles.zip!", ResourceCollectionReaderBase.PARAM_PATTERNS, new String[] { "[+]**/FileSetCollectionReaderBase.class", "[-]**/ResourceCollectionReaderBase.class" }); searchForResourceCollectionReaderBase(reader); } @Test public void testZip2() throws Exception { CollectionReader reader = createReader(DummyReader.class, ResourceCollectionReaderBase.PARAM_SOURCE_LOCATION, "jar:file:src/test/resources/testfiles.zip", ResourceCollectionReaderBase.PARAM_PATTERNS, new String[] { "[+]**/FileSetCollectionReaderBase.class", "[-]**/ResourceCollectionReaderBase.class" }); searchForResourceCollectionReaderBase(reader); } @Test public void testZip3() throws Exception { CollectionReader reader = createReader(DummyReader.class, ResourceCollectionReaderBase.PARAM_SOURCE_LOCATION, "jar:file:src/test/resources/testfiles.zip", ResourceCollectionReaderBase.PARAM_PATTERNS, new String[] { "[+]**/FileSetCollectionReaderBase.class", "[-]test*/ResourceCollectionReaderBase.class" }); searchForResourceCollectionReaderBase(reader); } @Test public void testZip4() throws Exception { String path = "jar:file:" + new File("src/test/resources/testfiles.zip").getAbsolutePath(); CollectionReader reader = createReader(DummyReader.class, ResourceCollectionReaderBase.PARAM_SOURCE_LOCATION, path, ResourceCollectionReaderBase.PARAM_PATTERNS, new String[] { "[+]**/FileSetCollectionReaderBase.class", "[-]test*/ResourceCollectionReaderBase.class" }); searchForResourceCollectionReaderBase(reader); } @Test public void testZipNoPattern() throws Exception { CollectionReader reader = createReader(DummyReader.class, ResourceCollectionReaderBase.PARAM_SOURCE_LOCATION, "jar:file:src/test/resources/testfiles.zip!/testfiles/FileSetCollectionReaderBase.class"); searchForResourceCollectionReaderBase(reader); } @Ignore("This does not work because the underlying Spring component does not do remote.") @Test public void testRemoteZip() throws Exception { CollectionReader reader = createReader( DummyReader.class, ResourceCollectionReaderBase.PARAM_SOURCE_LOCATION, "jar:http://search.maven.org/remotecontent?filepath=org/annolab/tt4j/org.annolab.tt4j/1.0.16/org.annolab.tt4j-1.0.16.jar!", ResourceCollectionReaderBase.PARAM_PATTERNS, new String[] { "[+]**/FileSetCollectionReaderBase.class", "[-]**/ResourceCollectionReaderBase.class" }); searchForResourceCollectionReaderBase(reader); } @Test public void testFile() throws Exception { CollectionReader reader = createReader(DummyReader.class, ResourceCollectionReaderBase.PARAM_SOURCE_LOCATION, "file:src/main/java/de/tudarmstadt/ukp/", ResourceCollectionReaderBase.PARAM_PATTERNS, new String[] { "[+]**/FileSetCollectionReaderBase.java", "[-]**/ResourceCollectionReaderBase.java" }); searchForResourceCollectionReaderBase(reader); } @Test public void testFileNoPrefix() throws Exception { CollectionReader reader = createReader(DummyReader.class, ResourceCollectionReaderBase.PARAM_SOURCE_LOCATION, "file:src/main/java/de/tudarmstadt/ukp/", ResourceCollectionReaderBase.PARAM_PATTERNS, new String[] { "**/FileSetCollectionReaderBase.java", "[-]**/ResourceCollectionReaderBase.java" }); searchForResourceCollectionReaderBase(reader); } @Test public void testFileNoPattern1() throws Exception { CollectionReader reader = createReader(DummyReader.class, ResourceCollectionReaderBase.PARAM_SOURCE_LOCATION, "file:src/main/java/de/tudarmstadt/ukp/**/FileSetCollectionReaderBase.java"); searchForResourceCollectionReaderBase(reader); } @Test public void testFileNoPattern2() throws Exception { CollectionReader reader = createReader(DummyReader.class, ResourceCollectionReaderBase.PARAM_SOURCE_LOCATION, "file:src/main/java/de/tudarmstadt/ukp/dkpro/core/api/io/FileSetCollectionReaderBase.java"); searchForResourceCollectionReaderBase(reader); } @Test public void testFileNoPattern3() throws Exception { CollectionReader reader = createReader(DummyReader.class, ResourceCollectionReaderBase.PARAM_SOURCE_LOCATION, "s*/main/java/de/tudarmstadt/ukp/dkpro/core/api/io/FileSetCollectionReaderBase.java"); searchForResourceCollectionReaderBase(reader); } @Test public void testFileNoPattern4() throws Exception { CollectionReader reader = createReader(DummyReader.class, ResourceCollectionReaderBase.PARAM_SOURCE_LOCATION, "file:s*/main/java/de/tudarmstadt/ukp/dkpro/core/api/io/FileSetCollectionReaderBase.java"); searchForResourceCollectionReaderBase(reader); } @Test public void testFileNoSource() throws Exception { CollectionReader reader = createReader(DummyReader.class, ResourceCollectionReaderBase.PARAM_PATTERNS, "src/main/java/de/tudarmstadt/ukp/**/FileSetCollectionReaderBase.java"); searchForResourceCollectionReaderBase(reader); } @Test(expected = ResourceInitializationException.class) public void testBrokenPattern() throws Exception { CollectionReader reader = createReader(DummyReader.class, ResourceCollectionReaderBase.PARAM_SOURCE_LOCATION, "file:src/main/java/de/tudarmstadt/ukp/", ResourceCollectionReaderBase.PARAM_PATTERNS, new String[] { "[?]**/FileSetCollectionReaderBase.java" }); searchForResourceCollectionReaderBase(reader); } @Test public void testExternalLoaderLocator() throws Exception { ExternalResourceDescription locator = createExternalResourceDescription(ResourceLoaderLocator.class); CollectionReader reader = createReader(DummyReader.class, ResourceCollectionReaderBase.PARAM_SOURCE_LOCATION, "file:src/main/java/de/tudarmstadt/ukp/", ResourceCollectionReaderBase.PARAM_PATTERNS, new String[] { "[+]**/FileSetCollectionReaderBase.java", "[-]**/ResourceCollectionReaderBase.java" }, ResourceCollectionReaderBase.KEY_RESOURCE_RESOLVER, locator); searchForResourceCollectionReaderBase(reader); } public void searchForResourceCollectionReaderBase(CollectionReader aReader) throws Exception { String goodNeedle = "FileSetCollectionReaderBase"; String badNeedle = "ResourceCollectionReaderBase"; boolean found = false; CAS cas = CasCreationUtils.createCas(aReader.getProcessingResourceMetaData()); while (aReader.hasNext()) { aReader.getNext(cas); DocumentMetaData meta = DocumentMetaData.get(cas); System.out.printf("Found : [%s]%n", meta.getDocumentUri()); System.out.printf(" Base : [%s]%n", meta.getDocumentBaseUri()); System.out.printf(" ColID: [%s]%n", meta.getCollectionId()); System.out.printf(" DocID: [%s]%n", meta.getDocumentId()); System.out.println(); assertTrue(meta.getDocumentBaseUri().length() == 0 || meta.getDocumentBaseUri().endsWith("/")); if (meta.getDocumentUri().contains(goodNeedle)) { found = true; break; } if (meta.getDocumentUri().contains(badNeedle)) { fail("Bad needle [" + badNeedle + "] found even though it is excluded..."); } cas.reset(); } cas.release(); assertTrue("Good needle [" + goodNeedle + "] not found...", found); } public static final class DummyReader extends ResourceCollectionReaderBase { @Override public void getNext(CAS aCAS) throws IOException, CollectionException { Resource res = nextFile(); initCas(aCAS, res); } @Override public void close() throws IOException { // Ignore } } }