/* * Copyright 2010 Bizosys Technologies Limited * * Licensed to the Bizosys Technologies Limited (Bizosys) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The Bizosys licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.bizosys.hsearch.schema; import java.util.ArrayList; import java.util.List; import java.util.Locale; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.io.hfile.Compression; import org.apache.hadoop.hbase.regionserver.StoreFile; import com.bizosys.hsearch.hbase.HDML; import com.bizosys.oneline.SystemFault; import com.bizosys.oneline.conf.Configuration; import com.bizosys.oneline.services.Request; import com.bizosys.oneline.services.Response; import com.bizosys.oneline.services.Service; import com.bizosys.oneline.services.ServiceMetaData; /** * It is a facade for schema related opeations. * This schema creates in pristine mode 154 directories and 378 files. * @author karan * */ public class SchemaManager implements Service { private static SchemaManager instance = null; public static final SchemaManager getInstance() { if ( null != instance) return instance; synchronized (SchemaManager.class) { if ( null != instance) return instance; instance = new SchemaManager(); } return instance; } /** * Default constructor * */ public SchemaManager(){ } private static final String NO_COMPRESSION = Compression.Algorithm.NONE.getName(); private String searchCompression = NO_COMPRESSION; private boolean searchBlockCache = true; private int searchBlockSize = HColumnDescriptor.DEFAULT_BLOCKSIZE; private String searchBloomFilter = StoreFile.BloomType.ROWCOL.toString();; private int searchRepMode = HConstants.REPLICATION_SCOPE_GLOBAL;; private String teaserCompression = NO_COMPRESSION; private boolean teaserBlockCache = true; private int teaserBlockSize = HColumnDescriptor.DEFAULT_BLOCKSIZE; private String teaserBloomFilter = StoreFile.BloomType.NONE.toString();; private int teaserRepMode = HConstants.REPLICATION_SCOPE_GLOBAL;; private String contentCompression = Compression.Algorithm.GZ.getName(); private boolean contentBlockCache = false; private int contentBlockSize = HColumnDescriptor.DEFAULT_BLOCKSIZE; private String contentBloomFilter = StoreFile.BloomType.NONE.toString();; private int contentRepMode = HConstants.REPLICATION_SCOPE_GLOBAL;; private String idMapCompression = NO_COMPRESSION; private boolean idMapBlockCache = false; private int idMapBlockSize = HColumnDescriptor.DEFAULT_BLOCKSIZE; private String idMapBloomFilter = StoreFile.BloomType.NONE.toString(); private int idMapRepMode = HConstants.REPLICATION_SCOPE_GLOBAL; private String invertCompression = NO_COMPRESSION; private boolean invertBlockCache = false; private int invertBlockSize = HColumnDescriptor.DEFAULT_BLOCKSIZE; private String invertBloomFilter = StoreFile.BloomType.NONE.toString(); private int invertRepMode = HConstants.REPLICATION_SCOPE_GLOBAL; /** * Checks and Creates all necessary tables required for HSearch index. */ public boolean init(Configuration conf, ServiceMetaData meta) { this.searchCompression = resolveCompression( conf.get("meta.compression", "gz")); this.teaserCompression = resolveCompression( conf.get("teaser.compression", "gz")); this.contentCompression = resolveCompression( conf.get("content.compression", "gz")); this.idMapCompression = resolveCompression( conf.get("idmapping.compression", "gz")); this.invertCompression = resolveCompression( conf.get("invertindex.compression", "none")); try { SchemaLog.l.info("Creating Preview Table"); createPreview(conf); SchemaLog.l.info("Creating Content Table"); createContent(conf); SchemaLog.l.info("Creating IdMapping Table"); createIdMap(conf); SchemaLog.l.info("Creating Invert Table"); createInvert(conf); SchemaLog.l.info("Creating Config Table"); createConfigs(conf); SchemaLog.l.info("Creating Dictionary Table"); createDictionary(conf); return true; } catch (Exception sf) { sf.printStackTrace(System.err); SchemaLog.l.fatal(sf); return false; } } /** * Column Family : Search (META, SOCIAL, BUCKET) * : Teaser (ID, URL, TITLE, CACHE, PREVIEW) */ private void createPreview(Configuration conf) throws SystemFault{ int rev = conf.getInt("record.revision",1); List<HColumnDescriptor> colFamilies = new ArrayList<HColumnDescriptor>(); HColumnDescriptor search = new HColumnDescriptor( IOConstants.SEARCH_BYTES, rev, searchCompression, false, searchBlockCache, searchBlockSize, HConstants.FOREVER, searchBloomFilter, searchRepMode); HColumnDescriptor teaser = new HColumnDescriptor( IOConstants.TEASER_BYTES, 1, teaserCompression, false, teaserBlockCache, teaserBlockSize, HConstants.FOREVER, teaserBloomFilter, teaserRepMode); colFamilies.add(search); colFamilies.add(teaser); HDML.create(IOConstants.TABLE_PREVIEW, colFamilies); } /** * Column Family : Body (FIELDS) * : CITATION ( CITATION_FROM, CITATION_TO ) */ private void createContent(Configuration conf) throws SystemFault{ int rev = conf.getInt("record.revision",1); List<HColumnDescriptor> colFamilies = new ArrayList<HColumnDescriptor>(); HColumnDescriptor fields = new HColumnDescriptor( IOConstants.CONTENT_FIELDS_BYTES, rev, contentCompression, false, contentBlockCache, contentBlockSize, HConstants.FOREVER, contentBloomFilter, contentRepMode); HColumnDescriptor citation = new HColumnDescriptor( IOConstants.CONTENT_CITATION_BYTES, rev, contentCompression, false, contentBlockCache, contentBlockSize, HConstants.FOREVER, contentBloomFilter, contentRepMode); colFamilies.add(fields); colFamilies.add(citation); HDML.create(IOConstants.TABLE_CONTENT, colFamilies); } /** * emp123(ori document id) = b1 (bucket id), 2343(Bucket doc Serial No) * The mapped document id = b1_2343 (This is Unique ID) * @throws SystemFault */ private void createIdMap(Configuration conf) throws SystemFault{ HColumnDescriptor mapping = new HColumnDescriptor( IOConstants.NAME_VALUE_BYTES, 1, idMapCompression, false, idMapBlockCache, idMapBlockSize, HConstants.FOREVER, idMapBloomFilter, idMapRepMode); List<HColumnDescriptor> colFamilies = new ArrayList<HColumnDescriptor>(); colFamilies.add(mapping); HDML.create(IOConstants.TABLE_IDMAP, colFamilies); } /** * ABINASH * Table �A = First Character Map�, * Column Family 7 = Term Length, * Column H = Last Character * ID = Bucket Id * keyword_hash, * keyword * total docs(#) * [doc type 1, doc type 2, �, doc type n] * * [term type 1, term type 2, �, term type n] * * [term weight 1, term weight 2, �, term weight n] * * [term first pos 1, term first pos 2, �, term pos n]* * [Bucket doc Serial ID 1, Bucket doc Serial ID 2, �, n]* */ private void createInvert(Configuration conf) throws SystemFault{ List<HColumnDescriptor> colFamilies = new ArrayList<HColumnDescriptor>(); for ( char t : ILanguageMap.ALL_COLS) { colFamilies.clear(); for (char c : ILanguageMap.ALL_FAMS) { HColumnDescriptor indexCol = new HColumnDescriptor( new byte[] {(byte) c}, 1, invertCompression, false, invertBlockCache, invertBlockSize, HConstants.FOREVER, invertBloomFilter, invertRepMode); colFamilies.add(indexCol); } boolean isCreated = HDML.create(new String( new char[]{t}), colFamilies); if ( ! isCreated ) break; } } private void createConfigs(Configuration conf)throws SystemFault{ HColumnDescriptor config = new HColumnDescriptor( IOConstants.NAME_VALUE_BYTES, 1, NO_COMPRESSION, false, true, HColumnDescriptor.DEFAULT_BLOCKSIZE, HConstants.FOREVER, StoreFile.BloomType.NONE.toString(), HConstants.REPLICATION_SCOPE_GLOBAL); List<HColumnDescriptor> colFamilies = new ArrayList<HColumnDescriptor>(); colFamilies.add(config); HDML.create(IOConstants.TABLE_CONFIG, colFamilies); } private void createDictionary(Configuration conf)throws SystemFault{ HColumnDescriptor dict = new HColumnDescriptor( IOConstants.DICTIONARY_BYTES, 1, NO_COMPRESSION, false, true, HColumnDescriptor.DEFAULT_BLOCKSIZE, HConstants.FOREVER, StoreFile.BloomType.NONE.toString(), HConstants.REPLICATION_SCOPE_GLOBAL); List<HColumnDescriptor> colFamilies = new ArrayList<HColumnDescriptor>(); colFamilies.add(dict); HDML.create(IOConstants.TABLE_DICTIONARY, colFamilies); } /** * Initializes a language mapping class for a given locale. * @param l * @return Language specific schema mapping for a given Locale * @throws SystemFault */ public ILanguageMap getLanguageMap(Locale l) throws SystemFault { if ( Locale.ENGLISH.getDisplayLanguage().equals( l.getDisplayLanguage()) ) return new EnglishMap(); throw new SystemFault(l.toString() + " is not supported yet."); } public String getName() { return "Schema"; } public void process(Request arg0, Response arg1) { } public void stop() { } /** * Compression method to HBase compression code. * @param methodName * @return */ private static String resolveCompression(String methodName) { String compClazz = Compression.Algorithm.GZ.getName(); if ("gz".equals(methodName)) { compClazz = Compression.Algorithm.GZ.getName(); } else if ("lzo".equals(methodName)) { compClazz = Compression.Algorithm.LZO.getName(); } else if ("none".equals(methodName)) { compClazz = Compression.Algorithm.NONE.getName(); } return compClazz; } public static void main(String[] args) throws Exception { new SchemaManager().init(new Configuration(), null); } }