/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.tika.parser.microsoft; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import java.net.URL; import org.apache.tika.TikaTest; import org.apache.tika.extractor.ContainerExtractor; import org.apache.tika.io.TikaInputStream; import org.apache.tika.mime.MediaType; /** * Parent class of tests that the various POI powered parsers are * able to extract their embedded contents. */ public abstract class AbstractPOIContainerExtractionTest extends TikaTest { public static final MediaType TYPE_DOC = MediaType.application("msword"); public static final MediaType TYPE_PPT = MediaType.application("vnd.ms-powerpoint"); public static final MediaType TYPE_XLS = MediaType.application("vnd.ms-excel"); public static final MediaType TYPE_DOCX = MediaType.application("vnd.openxmlformats-officedocument.wordprocessingml.document"); public static final MediaType TYPE_PPTX = MediaType.application("vnd.openxmlformats-officedocument.presentationml.presentation"); public static final MediaType TYPE_XLSX = MediaType.application("vnd.openxmlformats-officedocument.spreadsheetml.sheet"); public static final MediaType TYPE_MSG = MediaType.application("vnd.ms-outlook"); public static final MediaType TYPE_TXT = MediaType.text("plain"); public static final MediaType TYPE_PDF = MediaType.application("pdf"); public static final MediaType TYPE_JPG = MediaType.image("jpeg"); public static final MediaType TYPE_GIF = MediaType.image("gif"); public static final MediaType TYPE_PNG = MediaType.image("png"); public static final MediaType TYPE_EMF = MediaType.image("emf"); public static final MediaType TYPE_WMF = MediaType.image("wmf"); protected static TikaInputStream getTestFile(String filename) throws Exception { URL input = AbstractPOIContainerExtractionTest.class.getResource( "/test-documents/" + filename); assertNotNull(filename + " not found", input); return TikaInputStream.get(input); } protected TrackingHandler process(String filename, ContainerExtractor extractor, boolean recurse) throws Exception { try (TikaInputStream stream = getTestFile(filename)) { assertEquals(true, extractor.isSupported(stream)); // Process it TrackingHandler handler = new TrackingHandler(); if (recurse) { extractor.extract(stream, extractor, handler); } else { extractor.extract(stream, null, handler); } // So they can check what happened return handler; } } }