/* * Copyright 2010 Bizosys Technologies Limited * * Licensed to the Bizosys Technologies Limited (Bizosys) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The Bizosys licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.bizosys.hsearch.index; import java.util.HashMap; import java.util.List; import java.util.Map; import com.bizosys.hsearch.common.Account; import com.bizosys.hsearch.filter.IStorable; import com.bizosys.hsearch.filter.Storable; import com.bizosys.hsearch.hbase.HReader; import com.bizosys.hsearch.hbase.HWriter; import com.bizosys.hsearch.hbase.HbaseLog; import com.bizosys.hsearch.hbase.NV; import com.bizosys.hsearch.hbase.NVBytes; import com.bizosys.hsearch.schema.ILanguageMap; import com.bizosys.hsearch.util.ObjectFactory; import com.bizosys.hsearch.util.Record; import com.bizosys.oneline.SystemFault; import com.bizosys.oneline.util.StringUtils; /** * Multiple term families grouped inside a Termtable * @author karan * */ public class TermTables { private static final int NV_FLUSH_LIMIT = 256; static {Account.init();} public IStorable bucketId = null; public Map<Character, TermFamilies> tables = null; private boolean threadSafe = true; public TermTables(boolean concurrency) { this.threadSafe = concurrency; } public TermTables(IStorable bucketId, boolean concurrency) { this.bucketId = bucketId; this.threadSafe = concurrency; } public void add(Term aTerm, ILanguageMap lang) { if ( null == tables) tables = new HashMap<Character, TermFamilies>(); if (StringUtils.isEmpty(aTerm.term) ) return; Character table = lang.getTableName(aTerm.term); TermFamilies block = null; if ( tables.containsKey(table)) block = tables.get(table); else { block = new TermFamilies(); tables.put(table, block); } block.add(aTerm, lang); } public boolean add(TermTables another) { if ( null == another.bucketId) return false; byte[] anotherPK = another.bucketId.toBytes(); if ( !Storable.compareBytes(this.bucketId.toBytes(), anotherPK) ) return false; /** * Both belong to same bucket zone */ for (Character otherTable : another.tables.keySet()) { TermFamilies otherFamilies = another.tables.get(otherTable); if (this.tables.containsKey(otherTable)) { TermFamilies thisFamilies = this.tables.get(otherTable); thisFamilies.add(otherFamilies); } else { this.tables.put(otherTable, otherFamilies); } } return true; } /** * Both belong to same bucket zone * @param another * @return */ public void addInSameBucket(TermTables another) { for (Character otherTable : another.tables.keySet()) { TermFamilies otherFamilies = another.tables.get(otherTable); if (this.tables.containsKey(otherTable)) { TermFamilies thisFamilies = this.tables.get(otherTable); thisFamilies.add(otherFamilies); } else { this.tables.put(otherTable, otherFamilies); } } } public void assignDocumentPosition(int docPos) { if ( null == tables) return; for ( TermFamilies tf : tables.values()) { if ( null == tf ) continue; tf.assignDocumentPosition(docPos); } } public void persist(boolean merge, boolean newBucket) throws SystemFault { TermsBlockRecord tbr = null; try { for ( Character tableName : tables.keySet()) { if ( newBucket ) { List<NV> nvs = ObjectFactory.getInstance().getNVList(); TermFamilies termFamilies = tables.get(tableName); int nvT = 0; for ( char fam: termFamilies.families.keySet() ) { TermColumns tc = termFamilies.families.get(fam); tc.toNVs(nvs); nvT = nvs.size(); if ( nvT > NV_FLUSH_LIMIT) { if ( IndexLog.l.isInfoEnabled() ) IndexLog.l.info("Total records size :" + nvT); Record record = new Record(bucketId,nvs); HWriter.getInstance(threadSafe).insert(tableName.toString(), record); nvs.clear(); } } if ( nvs.size() > 0) { Record record = new Record(bucketId,nvs); HWriter.getInstance(threadSafe).insert(tableName.toString(), record); nvs.clear(); } ObjectFactory.getInstance().putNVList(nvs); } else { TermFamilies termFamilies = tables.get(tableName); tbr = new TermsBlockRecord(bucketId); tbr.setTermFamilies(termFamilies); if (HbaseLog.l.isDebugEnabled()) HbaseLog.l.debug("TermTables.persist Table " + tableName + tbr.toString()); HWriter.getInstance(threadSafe).merge(tableName.toString(), tbr); tbr.cleanup(); } } } catch (Exception ex) { if ( null != tbr) tbr.cleanup(); throw new SystemFault(ex); } } /** * Populates the existing value. * @param tableName * @param termFamilies * @throws SystemFault */ public void setExistingValue(String tableName, TermFamilies termFamilies) throws SystemFault { List<NVBytes> existingB = HReader.getCompleteRow(tableName, bucketId.toBytes()); if ( null == existingB) return; for (char family: termFamilies.families.keySet()) { TermColumns cols = termFamilies.families.get(family); for (char col : cols.columns.keySet()) { TermList terms = cols.columns.get(col); for (NVBytes bytes : existingB) { if ( bytes.family[0] == family && bytes.name[0] == col) { terms.setExistingBytes(bytes.data); break; } } } } } /** * Calculates the total term inside the container * @return Total number of terms present */ public int getTableSize() { if ( null == this.tables) return 0; int totalsize = 0; for (TermFamilies tf : this.tables.values()) { if ( null == tf.families) continue; for ( TermColumns tcs : tf.families.values()) { if ( null == tcs.columns) continue; for (TermList tl : tcs.columns.values()) { if ( null == tl.lstKeywords) continue; for ( List<Term> terms: tl.lstKeywords.values()) { totalsize = totalsize + terms.size(); } } } } return totalsize; } public void cleanup() { if ( null == tables) return; for (TermFamilies families: tables.values()) { families.cleanup(); } tables.clear(); } }