/* * Copyright 2010 The Apache Software Foundation * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.bizosys.hsearch.index; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import com.bizosys.hsearch.common.HDocument; import com.bizosys.hsearch.inpipe.ComputeTokens; import com.bizosys.hsearch.inpipe.DeleteFromDictionary; import com.bizosys.hsearch.inpipe.DeleteFromIndex; import com.bizosys.hsearch.inpipe.DeleteFromPreviewAndDetail; import com.bizosys.hsearch.inpipe.FilterDuplicateId; import com.bizosys.hsearch.inpipe.FilterLowercase; import com.bizosys.hsearch.inpipe.FilterStem; import com.bizosys.hsearch.inpipe.FilterStopwords; import com.bizosys.hsearch.inpipe.FilterTermLength; import com.bizosys.hsearch.inpipe.SaveToDetail; import com.bizosys.hsearch.inpipe.SaveToDictionary; import com.bizosys.hsearch.inpipe.SaveToIndex; import com.bizosys.hsearch.inpipe.SaveToPreview; import com.bizosys.hsearch.inpipe.TokenizeStandard; import com.bizosys.oneline.ApplicationFault; import com.bizosys.oneline.SystemFault; import com.bizosys.oneline.conf.Configuration; import com.bizosys.oneline.pipes.PipeIn; import com.bizosys.oneline.util.StringUtils; /** * Performs write operation on HSearch index * @author karan * */ public class IndexWriter { private static IndexWriter singleton = null; public static IndexWriter getInstance() { if ( null != singleton) return singleton; synchronized (IndexWriter.class) { if ( null != singleton) return singleton; singleton = new IndexWriter(); } return singleton; } private Map<String, PipeIn> writePipes = null; /** * Initializes the standard pipes * Default private constructor */ private IndexWriter() { } public void init(Configuration conf) throws SystemFault, ApplicationFault{ if ( null == writePipes) createPipes(); for (PipeIn pipe: writePipes.values()) { pipe.init(conf); } } /** * Creates standard sets of pipes */ private void createPipes() { if ( null != this.writePipes) return; this.writePipes = new HashMap<String, PipeIn>(); FilterDuplicateId fdi = new FilterDuplicateId(); this.writePipes.put(fdi.getName(), fdi); TokenizeStandard ts = new TokenizeStandard(); this.writePipes.put(ts.getName(), ts); FilterStopwords fs = new FilterStopwords(); this.writePipes.put(fs.getName(), fs); FilterTermLength ftl = new FilterTermLength(); this.writePipes.put(ftl.getName(), ftl); FilterLowercase flc = new FilterLowercase(); this.writePipes.put(flc.getName(), flc); FilterStem fstem = new FilterStem(); this.writePipes.put(fstem.getName(), fstem); ComputeTokens ct = new ComputeTokens(); this.writePipes.put(ct.getName(), ct); SaveToIndex si = new SaveToIndex(); this.writePipes.put(si.getName(), si); SaveToDictionary sd = new SaveToDictionary(); this.writePipes.put(sd.getName(), sd); SaveToPreview stp = new SaveToPreview(); this.writePipes.put(stp.getName(), stp); SaveToDetail std = new SaveToDetail(); this.writePipes.put(std.getName(), std); DeleteFromIndex dfi = new DeleteFromIndex(); this.writePipes.put(dfi.getName(), dfi); DeleteFromPreviewAndDetail dfpd = new DeleteFromPreviewAndDetail(); this.writePipes.put(dfpd.getName(), dfpd); DeleteFromDictionary dfd = new DeleteFromDictionary(); this.writePipes.put(dfd.getName(), dfd); } public List<PipeIn> getPipes(String stepNames) throws SystemFault { IndexLog.l.debug("IndexWriter: getPipes = " + stepNames); if ( null == this.writePipes) createPipes(); String[] steps = StringUtils.getStrings(stepNames, ","); List<PipeIn> anvils = new ArrayList<PipeIn>(steps.length); for (String step : steps) { PipeIn aPipe = writePipes.get(step).getInstance(); if ( null == aPipe) { IndexLog.l.error("IndexWriter: getPipes Pipe not found = " + step); throw new SystemFault("Pipe Not Found: " + step); } anvils.add(aPipe); } return anvils; } /** * Following pipes are included in the standard write channel * * FilterDuplicateId,TokenizeStandard,FilterStopwords, * FilterTermLength,FilterLowercase,FilterStem,ComputeTokens, * SaveToIndex,SaveToDictionary,SaveToPreview,SaveToDetail * @return List of pipes */ public List<PipeIn> getStandardPipes() throws SystemFault { if ( null == this.writePipes) createPipes(); return getPipes( "FilterDuplicateId,TokenizeStandard,FilterStopwords,"+ "FilterTermLength,FilterLowercase,FilterStem,ComputeTokens," + "SaveToIndex,SaveToDictionary,SaveToPreview,SaveToDetail"); } /** * Insert one document applying the standard pipes * @param hdoc * @throws ApplicationFault * @throws SystemFault */ public void insert(HDocument hdoc) throws ApplicationFault, SystemFault{ List<PipeIn> localPipes = getStandardPipes(); insert(hdoc,localPipes); } /** * Insert a document with custom pipeline * @param hdoc * @param localPipes * @throws ApplicationFault * @throws SystemFault */ public void insert(HDocument hdoc, List<PipeIn> localPipes) throws ApplicationFault, SystemFault{ Doc doc = new Doc(hdoc); IndexLog.l.info("Insert Step 1 > Value parsing is over."); for (PipeIn in : localPipes) { IndexLog.l.debug("IndexWriter.insert.visitting : " + in.getName()); in.visit(doc); } IndexLog.l.info("Insert Step 2 > Pipe processing is over."); for (PipeIn in : localPipes) { IndexLog.l.debug("IndexWriter.insert.comitting :" + in.getName()); in.commit(); } IndexLog.l.info("Insert Step 3 > Commit is over."); } /** * Insert bunch of documents with standard pipelines * @param hdocs * @throws ApplicationFault * @throws SystemFault */ public void insert(List<HDocument> hdocs) throws ApplicationFault, SystemFault{ List<PipeIn> localPipes = getStandardPipes(); insert(hdocs,localPipes); } /** * Insert bunch of documents with custom pipeline * @param hdocs * @param pipes * @throws ApplicationFault * @throws SystemFault */ public void insert(List<HDocument> hdocs, List<PipeIn> pipes) throws ApplicationFault, SystemFault{ if ( null == hdocs) return; List<Doc> docs = new ArrayList<Doc>(hdocs.size()); for (HDocument hdoc : hdocs) { Doc doc = new Doc(hdoc); docs.add(doc); } IndexLog.l.info("Insert Step 1 > Value parsing is over."); for (Doc doc : docs) { for (PipeIn in : pipes) { IndexLog.l.debug("IndexWriter.insert.visitting : " + in.getName()); in.visit(doc); } } IndexLog.l.info("Insert Step 2 > Pipe processing is over."); for (PipeIn in : pipes) { IndexLog.l.debug("IndexWriter.insert.comitting :" + in.getName()); in.commit(); } IndexLog.l.info("Insert Step 3 > Commit is over."); } /** * 1 : Load the original document * 2 : Parse the document * 2 : Remove From Dictionry, Index, Preview and Detail */ public boolean delete(String documentId) throws ApplicationFault, SystemFault { IndexLog.l.info("IndexWriter.delete : " + documentId ); Doc origDoc = IndexReader.getInstance().get(documentId); if ( null == origDoc.teaser) return false; if ( null != origDoc.content) { if ( null != origDoc.content.stored ) { origDoc.content.analyzedIndexed = origDoc.content.stored; } } List<PipeIn> deletePipe = getPipes( "TokenizeStandard,FilterStopwords,FilterTermLength," + "FilterLowercase,FilterStem,ComputeTokens," + "DeleteFromIndex,DeleteFromPreviewAndDetail,DeleteFromDictionary"); IndexLog.l.info("Delete Step 1 > Value parsing is over."); for (PipeIn in : deletePipe) { IndexLog.l.debug("IndexWriter.delete.visit : " + in.getName()); in.visit(origDoc); } IndexLog.l.info("Delete Step 2 > Pipe processing is over."); for (PipeIn in : deletePipe) { IndexLog.l.debug("IndexWriter.delete.commit : " + in.getName()); in.commit(); } IndexLog.l.info("Delete Step 3 > Commit is over."); return true; } }