LsiIndexer.java example

Explorer
indextank-engine-master
/*
 * Copyright (c) 2011 LinkedIn, Inc
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package com.flaptor.indextank.index.lsi;

import java.io.IOException;

import org.apache.log4j.Logger;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;

import com.flaptor.indextank.Indexer;
import com.flaptor.indextank.index.Document;
import com.flaptor.util.Execute;
import com.google.common.base.Preconditions;

/**
 * @author Flaptor Development Team
 */
public final class LsiIndexer implements Indexer {
    private static final Logger logger = Logger.getLogger(Execute.whoAmI());

    //Lucene related variables.
    private LsiIndex workIndex;
    private IndexWriter writer;

    /**
     * Default constructor. 
     */
    public LsiIndexer(LsiIndex index) {
        Preconditions.checkNotNull(index);
        workIndex = index;
        openWriter();
    }

    /**
     *@inheritDoc
     */
    public synchronized void add(final String docId, final Document itdoc) {
        if (null == docId) {
            logger.error("No documentId specified. Ignoring addition.");
            return;
        }

        org.apache.lucene.document.Document doc = asLuceneDocument(itdoc);
        org.apache.lucene.document.Field docidPayloadField = new org.apache.lucene.document.Field(LsiIndex.PAYLOAD_TERM_FIELD, docId, Field.Store.NO, Field.Index.ANALYZED);
        doc.add(docidPayloadField);

        doc.add(new Field("documentId",docId,Field.Store.NO,Field.Index.NOT_ANALYZED));
        try {
            if (logger.isDebugEnabled()) {
                logger.debug("Adding document with docId=" + docId + ". Doc is " + itdoc.getFieldNames());
            }
            writer.updateDocument(docIdTerm(docId), doc);
        } catch (IOException e) {
            logger.error(e);
        }
    }



    /**
     *@inheritDoc
     */
    public synchronized void del(final String docId) {
        try {
            writer.deleteDocuments(docIdTerm(docId));
        } catch (IOException e) {
            logger.error(e);
        }
    }

    private static Term docIdTerm(final String docId) {
        return new Term("documentId", docId);
    }


    /**
     * Opens the writer.
     * @throws RuntTimeException if there was a problem opening the writer.
     */
    private void openWriter() {
        writer = workIndex.getLuceneIndexWriter();
    }

    /**
     * Closes the writer to be sure that all changes are in the directory and
     * then calls the shell command that makes a copy of it. Used to make a copy
     * of the directory while it's in a consistent state. If there is an index
     * optimization scheduled, it'll be performed here.
     * @throws IllegasStateException if the index copy couldn't be made.
     * @throws RuntimeException if there was a problem opening the index.
     */
    public synchronized void makeDirectoryCheckpoint() {
        workIndex.flush();
    }

    // Helper method to transform an IndexTank Document to a Lucene Document
    private static org.apache.lucene.document.Document asLuceneDocument(Document itd){
        org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document();
        for (String field: itd.getFieldNames()) {
            doc.add(new Field(field, itd.getField(field), Field.Store.NO, Field.Index.ANALYZED));
        }

        return doc;
    }


}