ExtractingParams.java example

Explorer
solrcene-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.handler.extraction;


/**
 * The various Solr Parameters names to use when extracting content.
 *
 **/
public interface ExtractingParams {

  /**
   * Map all generated attribute names to field names with lowercase and underscores.
   */
  public static final String LOWERNAMES = "lowernames";


  /**
   * The param prefix for mapping Tika metadata to Solr fields.
   * <p/>
   * To map a field, add a name like:
   * <pre>fmap.title=solr.title</pre>
   *
   * In this example, the tika "title" metadata value will be added to a Solr field named "solr.title"
   *
   *
   */
  public static final String MAP_PREFIX = "fmap.";

  /**
   * The boost value for the name of the field.  The boost can be specified by a name mapping.
   * <p/>
   * For example
   * <pre>
   * map.title=solr.title
   * boost.solr.title=2.5
   * </pre>
   * will boost the solr.title field for this document by 2.5
   *
   */
  public static final String BOOST_PREFIX = "boost.";

  /**
   * Pass in literal values to be added to the document, as in
   * <pre>
   *  literal.myField=Foo 
   * </pre>
   *
   */
  public static final String LITERALS_PREFIX = "literal.";


  /**
   * Restrict the extracted parts of a document to be indexed
   *  by passing in an XPath expression.  All content that satisfies the XPath expr.
   * will be passed to the {@link SolrContentHandler}.
   * <p/>
   * See Tika's docs for what the extracted document looks like.
   * <p/>
   * @see #CAPTURE_ELEMENTS
   */
  public static final String XPATH_EXPRESSION = "xpath";


  /**
   * Only extract and return the content, do not index it.
   */
  public static final String EXTRACT_ONLY = "extractOnly";

  /**
   * Content output format if extractOnly is true. Default is "xml", alternative is "text".
   */
  public static final String EXTRACT_FORMAT = "extractFormat";

  /**
   * Capture attributes separately according to the name of the element, instead of just adding them to the string buffer
   */
  public static final String CAPTURE_ATTRIBUTES = "captureAttr";


  /**
   * Capture the specified fields (and everything included below it that isn't capture by some other capture field) separately from the default.  This is different
   * then the case of passing in an XPath expression.
   * <p/>
   * The Capture field is based on the localName returned to the {@link SolrContentHandler}
   * by Tika, not to be confused by the mapped field.  The field name can then
   * be mapped into the index schema.
   * <p/>
   * For instance, a Tika document may look like:
   * <pre>
   *  <html>
   *    ...
   *    <body>
   *      <p>some text here.  <div>more text</div></p>
   *      Some more text
   *    </body>
   * </pre>
   * By passing in the p tag, you could capture all P tags separately from the rest of the t
   * Thus, in the example, the capture of the P tag would be: "some text here.  more text"
   *
   */
  public static final String CAPTURE_ELEMENTS = "capture";

  /**
   * The type of the stream.  If not specified, Tika will use mime type detection.
   */
  public static final String STREAM_TYPE = "stream.type";


  /**
   * Optional.  The file name. If specified, Tika can take this into account while
   * guessing the MIME type.
   */
  public static final String RESOURCE_NAME = "resource.name";


  /**
   * Optional.  If specified, the prefix will be prepended to all Metadata, such that it would be possible
   * to setup a dynamic field to automatically capture it
   */
  public static final String UNKNOWN_FIELD_PREFIX = "uprefix";

  /**
   * Optional.  If specified and the name of a potential field cannot be determined, the default Field specified
   * will be used instead.
   */
  public static final String DEFAULT_FIELD = "defaultField";
}