/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.camel.component.tika; import java.io.IOException; import java.nio.charset.Charset; import org.xml.sax.SAXException; import org.apache.camel.spi.Metadata; import org.apache.camel.spi.UriParam; import org.apache.camel.spi.UriParams; import org.apache.camel.spi.UriPath; import org.apache.tika.config.TikaConfig; import org.apache.tika.exception.TikaException; @UriParams public class TikaConfiguration { @UriPath(description = "Operation type") @Metadata(required = "true") private TikaOperation operation; @UriParam(defaultValue = "xml") private TikaParseOutputFormat tikaParseOutputFormat = TikaParseOutputFormat.xml; @UriParam(description = "Tika Parse Output Encoding") private String tikaParseOutputEncoding = Charset.defaultCharset().name(); @UriParam(description = "Tika Config") private TikaConfig tikaConfig = TikaConfig.getDefaultConfig(); @UriParam(description = "Tika Config Url") private String tikaConfigUri; public TikaOperation getOperation() { return operation; } /** * * Tika Operation. parse or detect * */ public void setOperation(TikaOperation operation) { this.operation = operation; } public void setOperation(String operation) { this.operation = TikaOperation.valueOf(operation); } public TikaParseOutputFormat getTikaParseOutputFormat() { return tikaParseOutputFormat; } /** * * Tika Output Format. Supported output formats. * <ul> * <li>xml: Returns Parsed Content as XML. </li> * <li>html: Returns Parsed Content as HTML. </li> * <li>text: Returns Parsed Content as Text. </li> * <li>textMain: Uses the <a href="http://code.google.com/p/boilerpipe/">boilerpipe</a> library to automatically extract the main content from a web page. </li> * </ul> * */ public void setTikaParseOutputFormat(TikaParseOutputFormat tikaParseOutputFormat) { this.tikaParseOutputFormat = tikaParseOutputFormat; } public String getTikaParseOutputEncoding() { return tikaParseOutputEncoding; } /** * Tika Parse Output Encoding - Used to specify the character encoding of the parsed output. * Defaults to Charset.defaultCharset() . * */ public void setTikaParseOutputEncoding(String tikaParseOutputEncoding) { this.tikaParseOutputEncoding = tikaParseOutputEncoding; } public TikaConfig getTikaConfig() { return tikaConfig; } /** * * Tika Config * */ public void setTikaConfig(TikaConfig tikaConfig) { this.tikaConfig = tikaConfig; } public String getTikaConfigUri() { return tikaConfigUri; } /** * * Tika Config Uri: The URI of tika-config.xml * */ public void setTikaConfigUri(String tikaConfigUri) throws TikaException, IOException, SAXException { this.tikaConfigUri = tikaConfigUri; this.tikaConfig = new TikaConfig(tikaConfigUri); } }