/* * Copyright 2010 Bizosys Technologies Limited * * Licensed to the Bizosys Technologies Limited (Bizosys) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The Bizosys licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.bizosys.hsearch.inpipe; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import com.bizosys.hsearch.filter.Storable; import com.bizosys.hsearch.hbase.HWriter; import com.bizosys.hsearch.hbase.IUpdatePipe; import com.bizosys.hsearch.index.Doc; import com.bizosys.hsearch.index.IdMapping; import com.bizosys.hsearch.index.Term; import com.bizosys.hsearch.schema.EnglishMap; import com.bizosys.hsearch.util.ObjectFactory; import com.bizosys.oneline.ApplicationFault; import com.bizosys.oneline.SystemFault; import com.bizosys.oneline.conf.Configuration; import com.bizosys.oneline.pipes.PipeIn; import com.bizosys.oneline.util.StringUtils; /** * Delete terms from the inverted index. * @author karan * */ public class DeleteFromIndex implements PipeIn { private static final boolean DEBUG_ENABLED = InpipeLog.l.isDebugEnabled(); List<Doc> documents = null; boolean persistId = true; public DeleteFromIndex() { } public DeleteFromIndex(boolean persistId) { this.persistId = persistId; } public void visit(Object objDoc, boolean multiWriter) throws ApplicationFault, SystemFault { if ( null == objDoc) throw new ApplicationFault("No document"); if ( null == documents) documents = ObjectFactory.getInstance().getDocumentList(); documents.add((Doc)objDoc); } /** * Cuts out section of docpositions which are in the removal list. */ public void commit(boolean multiWriter) throws ApplicationFault, SystemFault { if ( null == this.documents) return; Doc curDoc = null; Set<Long> uniqueBuckets = null; List<Short> docPositions = null; try { Map<Character,StringBuilder> tables = new HashMap<Character,StringBuilder>(); EnglishMap map = new EnglishMap(); uniqueBuckets = ObjectFactory.getInstance().getLongSet(); docPositions = ObjectFactory.getInstance().getShortList(); this.populateUniqueBuckets(uniqueBuckets); for (long bucket : uniqueBuckets) { docPositions.clear(); tables.clear(); for (Doc aDoc : documents) { curDoc = aDoc; if ( bucket != aDoc.bucketId) continue; if ( null == aDoc.docSerialId) { InpipeLog.l.warn("DeleteFromIndex:commit() Skipping Found Null SerialId" + aDoc.toString()); continue; } docPositions.add(aDoc.docSerialId); if ( null == aDoc.terms.all) { InpipeLog.l.warn("No terms Found" + curDoc.toString()); continue; } buildIndexFields(aDoc, tables, map); } IUpdatePipe pipe = new DeleteFromIndexWithCut(docPositions); byte[] pk = Storable.putLong(bucket); for (Character c : tables.keySet()) { String t = c.toString(); String strFamilies = tables.get(c).toString(); char[] charFamilies = strFamilies.toCharArray(); byte[][] families = new byte[charFamilies.length][]; for ( int i=0; i<charFamilies.length; i++) { families[i] = new byte[] { (byte) charFamilies[i]}; } if ( DEBUG_ENABLED ) InpipeLog.l.debug( "DeleteFromIndex> Deleting table " + t + " families " + strFamilies); HWriter.getInstance(multiWriter).update(t, pk, pipe, families); } } // Delete the mapping too.. if ( this.persistId ) { for (Doc aDoc : documents) { IdMapping.delete(aDoc.tenant, aDoc.teaser.id, multiWriter); } } } catch (Exception ex) { if ( null != curDoc) InpipeLog.l.error(curDoc.toString(), ex); else InpipeLog.l.error(ex); throw new SystemFault(ex); } finally { if ( null != this.documents) ObjectFactory.getInstance().putDocumentList(this.documents); if ( null != uniqueBuckets) ObjectFactory.getInstance().putLongSet(uniqueBuckets); } } private void buildIndexFields(Doc curDoc, Map<Character, StringBuilder> tables, EnglishMap map) { /** * Build table and family based on terms */ for (Term aTerm : curDoc.terms.all) { if ( StringUtils.isEmpty(aTerm.term)) continue; char table = map.getTableName(aTerm.term); char family = map.getColumnFamily(aTerm.term); //char col = map.getColumn(aTerm.term); Not needed to fetch the row. if ( tables.containsKey(table)) { StringBuilder sb = tables.get(table); if ( -1 == sb.toString().indexOf(family) ) sb.append(family); } else { StringBuilder sb = new StringBuilder(); sb.append(family); tables.put(table, sb); } } } private void populateUniqueBuckets(Set<Long> uniqueBuckets) throws ApplicationFault { if ( null == this.documents) return; for (Doc doc : this.documents) { if ( null == doc) continue; uniqueBuckets.add(doc.bucketId); } } public void init(Configuration conf){ this.persistId = conf.getBoolean("idmapping.enable", false); } public PipeIn getInstance() { return new DeleteFromIndex(this.persistId); } public String getName() { return "DeleteFromIndex"; } }