/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cocoon.components.search;
import org.apache.avalon.framework.component.Component;
import org.apache.cocoon.ProcessingException;
import java.net.URL;
import java.util.List;
/**
* The avalon behavioural component interface of generating
* lucene documents from an xml content.
*
* <p>
* The well-known fields of a lucene documents are defined as
* <code>*_FIELD</code> constants.
* </p>
* <p>
* You may access generated lucene documents via
* <code>allDocuments()</code>, or <code>iterator()</code>.
* </p>
* <p>
* You trigger the generating of lucene documents via
* <code>build()</code>.
* </p>
*
* @author <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
* @version CVS $Id$
*/
public interface LuceneXMLIndexer extends Component
{
/**
* The ROLE name of this avalon component.
* <p>
* Its value if the FQN of this interface,
* ie. <code>org.apache.cocoon.components.search.LuceneXMLIndexer</code>.
* </p>
*
* @since
*/
String ROLE = "org.apache.cocoon.components.search.LuceneXMLIndexer";
/**
* A Lucene document field name, containing xml content text of all xml elements.
* <p>
* A concrete implementation of this interface SHOULD
* provides a field named body.
* </p>
* <p>
* A concrete implementation MAY provide additional lucene
* document fields.
* </p>
*
* @since
*/
String BODY_FIELD = "body";
/**
* A Lucene document field name, containg the URI/URL of the indexed
* document.
* <p>
* A concrete implementation of this interface SHOULD
* provide a field named url.
* </p>
*
* @since
*/
String URL_FIELD = "url";
/**
* A Lucene document field name, containg the a unique key of the indexed
* document.
* <p>
* This document field is used internally to track document
* changes, and updates.
* </p>
* <p>
* A concrete implementation of this interface SHOULD
* provide a field named uid.
* </p>
*
* @since
*/
String UID_FIELD = "uid";
/**
* Build lucene documents from a URL.
* <p>
* This method will read the content of the URL, and generates
* one or more lucene documents. The generated lucence documents
* can be fetched using methods allDocuments(), and iterator().
* </p>
*
* @param url the content of this url gets indexed.
* @exception ProcessingException Description of Exception
* @since
*/
List build(URL url) throws ProcessingException;
}