/* * Copyright 2010 Bizosys Technologies Limited * * Licensed to the Bizosys Technologies Limited (Bizosys) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The Bizosys licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.bizosys.hsearch.dictionary; import java.io.IOException; import java.io.Writer; import java.util.Collection; import java.util.Date; import java.util.Hashtable; import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import com.bizosys.hsearch.hbase.HDML; import com.bizosys.hsearch.hbase.NV; import com.bizosys.hsearch.schema.IOConstants; import com.bizosys.oneline.ApplicationFault; import com.bizosys.oneline.SystemFault; import com.bizosys.oneline.conf.Configuration; import com.bizosys.oneline.services.Request; import com.bizosys.oneline.services.Response; import com.bizosys.oneline.services.Service; import com.bizosys.oneline.services.ServiceMetaData; import com.bizosys.oneline.services.scheduler.ExpressionBuilder; import com.bizosys.oneline.services.scheduler.ScheduleTask; import com.bizosys.oneline.util.StringUtils; /** * This is the Facade for the dictionry service. It's responsible for * initializing dictionry service as well as serving clients. This is the * single entry point for outside clients to perform dictionry operations. * @author karan * */ public class DictionaryManager implements Service{ /** * Dictionry Refresh Task. In a multi machine environment, this keeps * loading the dictionry to in memory locally in interval to ensure * fuzzy and regex searches. */ ScheduleTask scheduledRefresh = null; /** * The dictionry repositories for the tenants * Once loaded, they are cached * TODO: Do maintenance work at interval. * TODO: Migrate these to a caching framework which works on a given fixed memory. */ Map<String, Dictionary> dictRepos = new ConcurrentHashMap<String, Dictionary>(); int mergeCount = 1000; int pageSize = 1000; boolean isSpellChecked = true; boolean concurrency = true; int maxCacheSize = 10; int fuzzyLevel = 2; /** * Singleton */ private static DictionaryManager instance = null; /** * Constructor is private. This ensures singleton * @return DictionaryManager * @throws SystemFault */ public static final DictionaryManager getInstance() throws SystemFault { if ( null == instance) throw new SystemFault( "DisctionaryManager is not initialized"); return instance; } /** * Default Constructor * Needs to be initialized only once. Done by ServiceFacade. */ public DictionaryManager() { instance = this; } /** * Service name - Dictionarymanager */ public String getName() { return "Dictionarymanager"; } /** * Launches dictionry refresh task and initializes the dictionry. */ public boolean init(Configuration conf, ServiceMetaData arg1) { DictLog.l.info("Initializing Dictionary Service"); DictionaryRefresh refreshTask = new DictionaryRefresh(); try { this.mergeCount = conf.getInt("dictionary.merge.words", 1000); this.pageSize = conf.getInt("dictionary.page.Size", 1000); this.concurrency = conf.getBoolean("dictionary.concurrency", true); this.isSpellChecked = conf.getBoolean("dictionary.spellcheck.enabled", true); this.maxCacheSize = conf.getInt("dictionary.cache.Size", 10); this.fuzzyLevel = conf.getInt("dictionary.fuzzy.level", 2); int refreshInteral = conf.getInt("dictionary.refresh", 30); ExpressionBuilder expr = new ExpressionBuilder(); expr.setSecond(0, false); expr.setMinute(refreshInteral, true); long startTime = new Date().getTime() + refreshInteral * 60 * 1000 /** After 10 minutes */; scheduledRefresh = new ScheduleTask(refreshTask, expr.getExpression(), new Date(startTime), new Date(Long.MAX_VALUE)); DictLog.l.info("DisctionaryManager > Dictionry Refresh task is scheduled."); return true; } catch (Exception ex) { DictLog.l.fatal("DisctionaryManager >", ex); return false; } } public void process(Request arg0, Response arg1) { } protected Map<String, Dictionary> getCachedTenants() { return dictRepos; } /** * Stop the refresh task */ public void stop() { if ( null != this.scheduledRefresh) this.scheduledRefresh.endDate = new Date(System.currentTimeMillis()); } /** * Removes the complete dictionary. * @throws SystemFault */ public void purge() throws SystemFault { try { NV kv = new NV(IOConstants.DICTIONARY_BYTES, IOConstants.DICTIONARY_TERM_BYTES); HDML.truncate(IOConstants.TABLE_DICTIONARY, kv); } catch (Exception ex) { throw new SystemFault(ex); } } /** * If cached, give from cache or load it from the database * @param tenant * @return * @throws ApplicationFault * @throws SystemFault */ public Dictionary getDictionary(String tenant) throws ApplicationFault, SystemFault { if ( this.dictRepos.containsKey(tenant)) return this.dictRepos.get(tenant); Dictionary aDict = new Dictionary(tenant, mergeCount, pageSize, concurrency); this.dictRepos.put(tenant, aDict); if ( isSpellChecked ) { DictLog.l.info("DisctionaryManager > Caching dictionary terms to memory."); aDict.buildTerms(); } return aDict; } /** * Get the first page words from the dictionary * @return List of words * @throws SystemFault */ public void getKeywords(String tenant, Writer writer) throws ApplicationFault, SystemFault { try { Dictionary aDict = getDictionary(tenant); writer.append("<words>"); aDict.getAll(StringUtils.Empty, writer); writer.append("</words>"); } catch (IOException ex) { throw new SystemFault(ex); } } /** * Get the first page words from the dictionary * @return List of words * @throws SystemFault */ public void getKeywords(String tenant, String indexLetter, Writer writer) throws ApplicationFault, SystemFault { try { Dictionary aDict = getDictionary(tenant); writer.append("<words>"); aDict.getAll(indexLetter, writer); writer.append("</words>"); } catch (IOException ex) { throw new SystemFault(ex); } } /** * Add a single entry to the dictionry * @param entry * @throws SystemFault */ public void add(String tenant, DictEntry entry) throws ApplicationFault, SystemFault { Dictionary aDict = getDictionary(tenant); Hashtable<String, DictEntry> entries = new Hashtable<String, DictEntry>(1); entries.put(entry.word, entry); aDict.add(entries); } /** * Add bunch of entries to the dictionry * @param entries * @throws SystemFault */ public void add(String tenant, Map<String, DictEntry> entries) throws ApplicationFault, SystemFault { Dictionary aDict = getDictionary(tenant); aDict.add(entries); } public void refresh(String tenant) throws ApplicationFault, SystemFault { Dictionary aDict = getDictionary(tenant); aDict.buildTerms(); } /** * Get directly the keyword * @param keyword * @return Dictionary Entry * @throws SystemFault */ public DictEntry get(String tenant, String keyword) throws ApplicationFault,SystemFault { if ( StringUtils.isEmpty(keyword)) return null; Dictionary aDict = getDictionary(tenant); return aDict.get(keyword); } /** * Check for the right spelling for the given keyword * @param keyword * @return List of matching words * @throws SystemFault */ public List<String> getSpelled(String tenant,String keyword) throws ApplicationFault,SystemFault { if ( StringUtils.isEmpty(keyword)) return null; Dictionary aDict = getDictionary(tenant); List<String> words = aDict.fuzzy(keyword, 1); if ( words.size() > 0) return words; return aDict.fuzzy(keyword, this.fuzzyLevel); } /** * Gets matching keywords for the given wildcard keyword. * @param keyword The regular expression * @return List of matching words * @throws SystemFault */ public List<String> getWildCard(String tenant, String keyword) throws ApplicationFault,SystemFault { if ( StringUtils.isEmpty(keyword)) return null; Dictionary aDict = getDictionary(tenant); return aDict.regex(keyword); } /** * This completely removes the keywords from the dictionry * @param keywords * @throws SystemFault */ public void delete(String tenant, Collection<String> keywords) throws ApplicationFault,SystemFault { if ( null == keywords) return; Dictionary aDict = getDictionary(tenant); aDict.delete(keywords); } /** * Delete a keyword from the dictionary * @param keyword The dictionary word * @throws SystemFault System Error */ public void delete(String tenant, String keyword) throws ApplicationFault,SystemFault { if ( StringUtils.isEmpty(keyword)) return; Dictionary aDict = getDictionary(tenant); aDict.delete(keyword); } /** * Once a document is removed, substract it's keywords from the dictionry * If there are more * @param entries * @throws SystemFault */ public void substract(String tenant, Map<String, DictEntry> entries) throws ApplicationFault,SystemFault { if ( null == entries) return; Dictionary aDict = getDictionary(tenant); aDict.substract(entries); } }