/** * Copyright (c) Codice Foundation * <p> * This is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser * General Public License as published by the Free Software Foundation, either version 3 of the * License, or any later version. * <p> * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. A copy of the GNU Lesser General Public License * is distributed along with this program and can be found at * <http://www.gnu.org/licenses/lgpl.html>. */ package ddf.catalog.transformer.common.tika; import java.io.IOException; import java.io.InputStream; import org.apache.tika.exception.TikaException; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.Parser; import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; import ddf.catalog.transform.CatalogTransformerException; public class TikaMetadataExtractor { private final Parser parser; private final ContentHandler handler; /** * Creates a new {@code TikaMetadataExtractor} with a {@link Parser} and {@link ContentHandler}. * * @param parser the {@code Parser} to use * @param handler the {@code ContentHandler} to use with {@code parser} */ public TikaMetadataExtractor(final Parser parser, final ContentHandler handler) { this.parser = parser; this.handler = handler; } /** * Parses metadata from {@code inputStream} using the supplied {@link Parser}, * {@link ContentHandler}, and {@link ParseContext}. * * @param inputStream the data to parse * @param parseContext context information to pass to the {@code Parser}, may be null * @return a {@link Metadata} object containing the metadata that the {@code Parser} was able to * extract from {@code inputStream} * @throws CatalogTransformerException * @throws IOException */ public Metadata parseMetadata(final InputStream inputStream, final ParseContext parseContext) throws CatalogTransformerException, IOException { final Metadata metadata = new Metadata(); try { parser.parse(inputStream, handler, metadata, parseContext); } catch (SAXException | TikaException e) { throw new CatalogTransformerException(e); } return metadata; } }