/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.server;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import au.com.bytecode.opencsv.CSVReader;
import org.apache.cxf.helpers.IOUtils;
import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.serialization.JsonMetadata;
import org.apache.tika.server.resource.MetadataResource;
import org.apache.tika.server.writer.CSVMessageBodyWriter;
import org.apache.tika.server.writer.JSONMessageBodyWriter;
import org.apache.tika.server.writer.TextMessageBodyWriter;
import org.apache.tika.server.writer.XMPMessageBodyWriter;
import org.junit.Assert;
import org.junit.Test;
public class MetadataResourceTest extends CXFTestBase {
private static final String META_PATH = "/meta";
@Override
protected void setUpResources(JAXRSServerFactoryBean sf) {
sf.setResourceClasses(MetadataResource.class);
sf.setResourceProvider(MetadataResource.class,
new SingletonResourceProvider(new MetadataResource()));
}
@Override
protected void setUpProviders(JAXRSServerFactoryBean sf) {
List<Object> providers = new ArrayList<Object>();
providers.add(new JSONMessageBodyWriter());
providers.add(new CSVMessageBodyWriter());
providers.add(new XMPMessageBodyWriter());
providers.add(new TextMessageBodyWriter());
sf.setProviders(providers);
}
@Test
public void testSimpleWord() throws Exception {
Response response = WebClient
.create(endPoint + META_PATH)
.type("application/msword")
.accept("text/csv")
.put(ClassLoader
.getSystemResourceAsStream(TikaResourceTest.TEST_DOC));
Reader reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8);
CSVReader csvReader = new CSVReader(reader);
Map<String, String> metadata = new HashMap<String, String>();
String[] nextLine;
while ((nextLine = csvReader.readNext()) != null) {
metadata.put(nextLine[0], nextLine[1]);
}
csvReader.close();
assertNotNull(metadata.get("Author"));
assertEquals("Maxim Valyanskiy", metadata.get("Author"));
assertEquals("X-TIKA:digest:MD5", "f8be45c34e8919eedba48cc8d207fbf0",
metadata.get("X-TIKA:digest:MD5"));
}
@Test
public void testPasswordProtected() throws Exception {
Response response = WebClient
.create(endPoint + META_PATH)
.type("application/vnd.ms-excel")
.accept("text/csv")
.put(ClassLoader
.getSystemResourceAsStream(TikaResourceTest.TEST_PASSWORD_PROTECTED));
// Won't work, no password given
assertEquals(500, response.getStatus());
// Try again, this time with the wrong password
response = WebClient
.create(endPoint + META_PATH)
.type("application/vnd.ms-excel")
.accept("text/csv")
.header("Password", "wrong password")
.put(ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_PASSWORD_PROTECTED));
assertEquals(500, response.getStatus());
// Try again, this time with the password
response = WebClient
.create(endPoint + META_PATH)
.type("application/vnd.ms-excel")
.accept("text/csv")
.header("Password", "password")
.put(ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_PASSWORD_PROTECTED));
// Will work
assertEquals(200, response.getStatus());
// Check results
Reader reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8);
CSVReader csvReader = new CSVReader(reader);
Map<String, String> metadata = new HashMap<String, String>();
String[] nextLine;
while ((nextLine = csvReader.readNext()) != null) {
metadata.put(nextLine[0], nextLine[1]);
}
csvReader.close();
assertNotNull(metadata.get("Author"));
assertEquals("pavel", metadata.get("Author"));
}
@Test
public void testJSON() throws Exception {
Response response = WebClient
.create(endPoint + META_PATH)
.type("application/msword")
.accept("application/json")
.put(ClassLoader
.getSystemResourceAsStream(TikaResourceTest.TEST_DOC));
Reader reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8);
Metadata metadata = JsonMetadata.fromJson(reader);
assertNotNull(metadata.get("Author"));
assertEquals("Maxim Valyanskiy", metadata.get("Author"));
}
@Test
public void testXMP() throws Exception {
Response response = WebClient
.create(endPoint + META_PATH)
.type("application/msword")
.accept("application/rdf+xml")
.put(ClassLoader
.getSystemResourceAsStream(TikaResourceTest.TEST_DOC));
String result = IOUtils.readStringFromStream((InputStream) response.getEntity());
assertContains("<rdf:li>Maxim Valyanskiy</rdf:li>", result);
}
//Now test requesting one field
@Test
public void testGetField_XXX_NotFound() throws Exception {
Response response = WebClient.create(endPoint + META_PATH + "/xxx").type("application/msword")
.accept(MediaType.APPLICATION_JSON).put(ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC));
Assert.assertEquals(Response.Status.NOT_FOUND.getStatusCode(), response.getStatus());
}
@Test
public void testGetField_Author_TEXT_Partial_BAD_REQUEST() throws Exception {
InputStream stream = ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC);
Response response = WebClient.create(endPoint + META_PATH + "/Author").type("application/msword")
.accept(MediaType.TEXT_PLAIN).put(copy(stream, 8000));
Assert.assertEquals(Response.Status.BAD_REQUEST.getStatusCode(), response.getStatus());
}
@Test
public void testGetField_Author_TEXT_Partial_Found() throws Exception {
InputStream stream = ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC);
Response response = WebClient.create(endPoint + META_PATH + "/Author").type("application/msword")
.accept(MediaType.TEXT_PLAIN).put(copy(stream, 12000));
Assert.assertEquals(Response.Status.OK.getStatusCode(), response.getStatus());
String s = IOUtils.readStringFromStream((InputStream) response.getEntity());
assertEquals("Maxim Valyanskiy", s);
}
@Test
public void testGetField_Author_JSON_Partial_Found() throws Exception {
InputStream stream = ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC);
Response response = WebClient.create(endPoint + META_PATH + "/Author").type("application/msword")
.accept(MediaType.APPLICATION_JSON).put(copy(stream, 12000));
Assert.assertEquals(Response.Status.OK.getStatusCode(), response.getStatus());
Metadata metadata = JsonMetadata.fromJson(new InputStreamReader(
(InputStream) response.getEntity(), UTF_8));
assertEquals("Maxim Valyanskiy", metadata.get("Author"));
assertEquals(1, metadata.names().length);
}
@Test
public void testGetField_Author_XMP_Partial_Found() throws Exception {
InputStream stream = ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC);
Response response = WebClient.create(endPoint + META_PATH + "/dc:creator").type("application/msword")
.accept("application/rdf+xml").put(copy(stream, 12000));
Assert.assertEquals(Response.Status.OK.getStatusCode(), response.getStatus());
String s = IOUtils.readStringFromStream((InputStream) response.getEntity());
assertContains("<rdf:li>Maxim Valyanskiy</rdf:li>", s);
}
}