/*
* ModeShape (http://www.modeshape.org)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.modeshape.test.integration;
import static org.junit.Assert.assertEquals;
import static org.modeshape.jcr.ValidateQuery.validateQuery;
import java.io.File;
import java.io.InputStream;
import javax.annotation.Resource;
import javax.jcr.Node;
import javax.jcr.Session;
import javax.jcr.query.Query;
import org.jboss.arquillian.container.test.api.Deployment;
import org.jboss.arquillian.junit.Arquillian;
import org.jboss.shrinkwrap.api.ArchivePaths;
import org.jboss.shrinkwrap.api.ShrinkWrap;
import org.jboss.shrinkwrap.api.asset.EmptyAsset;
import org.jboss.shrinkwrap.api.spec.WebArchive;
import org.jboss.shrinkwrap.resolver.api.maven.Maven;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.modeshape.common.FixFor;
import org.modeshape.jcr.JcrRepository;
import org.modeshape.jcr.api.JcrTools;
import org.modeshape.jcr.query.JcrQuery;
/**
* Arquillian test which checks that text-extraction works in an AS7 container.
*
* @author Horia Chiorean
*/
@RunWith( Arquillian.class )
public class TikaTextExtractorIntegrationTest {
private JcrTools jcrTools = new JcrTools();
private Session session;
@Deployment
public static WebArchive createDeployment() {
File[] testDeps = Maven.configureResolver()
.workOffline()
.loadPomFromFile("pom.xml")
.resolve("org.modeshape:modeshape-jcr:test-jar:tests:?").withTransitivity().asFile();
return ShrinkWrap.create(WebArchive.class, "tika-extractor-test.war")
.addAsLibraries(testDeps)
.addAsWebInfResource(EmptyAsset.INSTANCE, ArchivePaths.create("beans.xml"))
.addAsResource(new File("src/test/resources/text-extractor"))
.setManifest(new File("src/main/webapp/META-INF/MANIFEST.MF"));
}
@Resource( mappedName = "/jcr/query" )
private JcrRepository repository;
@Before
public void beforeEach() throws Exception {
session = repository.login("default");
}
@After
public void afterEach() throws Exception {
if (session == null) {
return;
}
try {
Node testRoot = session.getNode("/text-extractor");
if (testRoot != null) {
testRoot.remove();
session.save();
}
} finally {
session.logout();
}
}
@Test
public void shouldExtractAndIndexContentFromPlainTextFile() throws Exception {
String queryString = "select [jcr:path] from [nt:resource] as res where contains(res.*, 'The Quick')";
uploadFileAndCheckExtraction("text-extractor/text-file.txt", "text/plain", queryString);
}
@Test
public void shouldExtractAndIndexContentFromDocFile() throws Exception {
String queryString = "select [jcr:path] from [nt:resource] as res where contains(res.*, 'ModeShape supports')";
// upload under a nodename without extension, to check mime-type detection is content based (as configured)
uploadFileAndCheckExtraction("text-extractor/modeshape.doc", "application/msword", queryString);
}
@Test
@FixFor( "MODE-1810" )
public void shouldExtractAndIndexContentFromXlsxFile() throws Exception {
String queryString = "select [jcr:path] from [nt:resource] as res where contains(res.*, 'Operations')";
uploadFileAndCheckExtraction("text-extractor/sample-file.xlsx",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
queryString);
}
private void uploadFileAndCheckExtraction(String filepath,
String expectedMimeType,
String validationQuery) throws Exception {
String nodePath = "/" + filepath;
// this will create jcr:content of type nt:resource with the jcr:data property
jcrTools.uploadFile(session, nodePath, getResource(filepath));
session.save();
// wait a bit to make sure the text extraction has happened
Thread.sleep(1000);
String mimeType = session.getNode(nodePath).getNode("jcr:content").getProperty("jcr:mimeType").getString();
assertEquals("Expected mime-type has not been detected", expectedMimeType, mimeType);
Query query = session.getWorkspace().getQueryManager().createQuery(validationQuery, JcrQuery.JCR_SQL2);
validateQuery().hasNodesAtPaths(nodePath + "/jcr:content").useIndex("textFromFiles").validate(query, query.execute());
}
private InputStream getResource(String path) {
return getClass().getClassLoader().getResourceAsStream(path);
}
}