/**
*
*/
package eu.scape_project.pc.cc.nanite.tika;
import static org.junit.Assert.*;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import javax.activation.MimeTypeParseException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.ParseContext;
import org.junit.Before;
import org.junit.Test;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import uk.bl.wa.tika.PreservationParser;
/**
* @author Andrew Jackson <Andrew.Jackson@bl.uk>
*
*/
public class PreservationParserTest {
CompositeParser parser = null;
/**
* @throws java.lang.Exception
*/
@Before
public void setUp() throws Exception {
parser = new PreservationParser();
}
/**
* Test method for {@link uk.bl.wa.tika.PreservationParser#parse(java.io.InputStream, org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata, org.apache.tika.parser.ParseContext)}.
* @throws TikaException
* @throws SAXException
* @throws IOException
* @throws MimeTypeParseException
*/
@Test
public void testParseInputStreamContentHandlerMetadataParseContext() throws IOException, SAXException, TikaException, MimeTypeParseException {
// PDFs
this.testExtendedMIMEType( "/simple.pdf",
"application/pdf; version=\"1.4\"; software=\"OpenOffice.org 3.2\"; source=Writer");
this.testExtendedMIMEType( "/simple-PDFA-1a.pdf",
"application/pdf; version=\"A-1a\"; software=\"OpenOffice.org 3.2\"; source=Writer");
this.testExtendedMIMEType( "/simple-password-nocopy.pdf",
"application/pdf; version=\"1.4\"; software=\"OpenOffice.org 3.2\"; source=Writer");
// ODT
this.testExtendedMIMEType( "/simple.odt",
"application/vnd.oasis.opendocument.text; software=\"OpenOffice.org\\/3.2$Win32 OpenOffice.org_project\\/320m12$Build-9483\"");
// PNG
//this.testExtendedMIMEType( "/Users/andy/Documents/workspace/nanite/nanite-tika/src/test/resources/variatio-ipsius/apple-pages-09-4.1-923/convert-6.7.5-7/test.png",
// "image/png; software=\"ImageMagick\"");
// JPEG
//this.testExtendedMIMEType( "/Users/andy/Documents/workspace/tika/tika-parsers/src/test/resources/test-documents/testJPEG_EXIF.jpg",
// "image/jpeg; software=\"Adobe Photoshop CS3 Macintosh\"; hardware=\"Canon EOS 40D\"");
// TIFF?
}
/**
*
* @param filename
* @param expected
* @throws TikaException
* @throws SAXException
* @throws IOException
* @throws MimeTypeParseException
*/
private void testExtendedMIMEType(String filename, String expected ) throws IOException, SAXException, TikaException, MimeTypeParseException {
// Get the source file off the TEST class path:
InputStream input = getClass().getResourceAsStream(filename);
Metadata metadata = new Metadata();
parser.parse(input, new DefaultHandler() , metadata, new ParseContext() );
input.close();
// Report all metadata, for interest
System.out.println("Metadata for: "+filename);
String[] names = metadata.names();
Arrays.sort(names);
for( String key : names ) {
System.out.println( (key+" : "+metadata.get(key)) );
}
System.out.println("----");
// Recover the extended MIME Type:
MediaType tikaType = MediaType.parse(metadata.get(PreservationParser.EXT_MIME_TYPE) );
// Assert equality:
assertEquals( MediaType.parse( expected ), tikaType );
}
}