/* * Copyright 2010 The Apache Software Foundation * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.bizosys.hsearch.dictionary; import java.io.IOException; import java.util.ArrayList; import java.util.Date; import java.util.List; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import com.bizosys.oneline.ApplicationFault; import com.bizosys.oneline.SystemFault; import com.bizosys.oneline.services.batch.BatchTask; import com.bizosys.hsearch.hbase.HBaseFacade; import com.bizosys.hsearch.hbase.HTableWrapper; import com.bizosys.hsearch.schema.IOConstants; /** * Loads the dictionry terms to memory in intervals. * It helps for fuzzy search and regex search. * @author karan */ public class DictionaryRefresh implements BatchTask { /** * The job name */ private String jobName = "DictionaryRefresh"; /** * Run in incremental */ private boolean isIncremental = true; /** * When the last time the Job ran */ private long lastProcessingTime = -1; public String getJobName() { return this.jobName; } public void setJobName(String jobName) { this.jobName = jobName; } public Object process() throws ApplicationFault, SystemFault{ /** * download all new changes */ long now = new Date().getTime(); Scan scan = new Scan(); scan.setCaching(300); scan.setCacheBlocks(false); scan = scan.setMaxVersions(1); scan = scan.addColumn( IOConstants.DICTIONARY_BYTES,IOConstants.DICTIONARY_TERM_BYTES); try { if ( -1 != lastProcessingTime && isIncremental ) { scan = scan.setTimeRange(lastProcessingTime, now); } HTableWrapper table = HBaseFacade.getInstance().getTable(IOConstants.TABLE_DICTIONARY); ResultScanner iterator = table.getScanner(scan); List<byte[]> words = new ArrayList<byte[]>(); for ( Result r : iterator ) { if ( null == r) continue; if ( r.isEmpty()) continue; byte[] term = r.getValue(IOConstants.DICTIONARY_BYTES,IOConstants.DICTIONARY_TERM_BYTES); if ( null != term && term.length > 0 ) words.add(term); } lastProcessingTime = now; return true; } catch (IOException ex) { DictionaryLog.l.error("DictionaryRefresh : Failure", ex); throw new SystemFault(ex); } } }