/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.sohospace.dictionary;
import java.util.HashMap;
import java.util.Map;
/**
* Dictionary��ɢ��+�������ʵ�֡�
* <p>
* ���ڶԴ������ģ���ͷ�ַ���ͬ���ַ����϶�������e.g���ִ����ֵ䡣����������£������ٶȽ��ȶ����ֵ���졣
* <p>
*
* HashBinaryDictionary��һ���Ѿ�����Ĵ���Ϊ���룬����<b>ͷ�ַ�</b>��ͬ�Ĵ��ﻮΪһ��������Ϊ���ֵ�(ʹ��BinaryDictionaryʵ��)��
* ���Ҵ���ʱ���ȸ��ݵ�һ���ַ��ҵ÷ִʵ�(BinaryDictionaryʵ��)���ٴӸ÷ִʵ��ж�λ�ô��
* <p>
*
* @author Zhiliang Wang [qieqie.wang@gmail.com]
*
* @see BinaryDictionary
*
* @since 1.0
*
*/
public class HashBinaryDictionary implements Dictionary {
// -------------------------------------------------
/**
* �ֵ������д�����ڷ���{@link #get(int)}����
*/
private String[] ascWords;
/**
* ���ַ����ִʵ��ӳ��
*/
private Map<Integer, SubDictionaryWrap> subs;
/**
*
*/
private final int hashIndex;
private final int start;
private final int end;
private final int count;
// -------------------------------------------------
/**
*
* @param ascWords
* ���������
* @param initialCapacity
* @param loadFactor
*/
public HashBinaryDictionary(String[] ascWords, int initialCapacity,
float loadFactor) {
this(ascWords, 0, 0, ascWords.length, initialCapacity, loadFactor);
}
public HashBinaryDictionary(String[] ascWords, int hashIndex, int start, int end, int initialCapacity,
float loadFactor) {
this.ascWords = ascWords;
this.start = start;
this.end = end;
this.count = end - start;
this.hashIndex = hashIndex;
subs = new HashMap<Integer, SubDictionaryWrap>(initialCapacity, loadFactor);
createSubDictionaries();
}
// -------------------------------------------------
/**
* �����ִʵ�ӳ�䣬Ϊ���캯������
*/
protected void createSubDictionaries() {
// ��λ��ͬͷ�ַ�����Ŀ�ͷ�ͽ���λ����ȷ�Ϸ��ֵ�
int beginIndex = this.start;
int endIndex = this.start + 1;
char beginHashChar = getChar(ascWords[start], hashIndex);
char endHashChar;
for (; endIndex < this.end; endIndex++) {
endHashChar = getChar(ascWords[endIndex], hashIndex);
if (endHashChar != beginHashChar) {
addSubDictionary(beginHashChar, beginIndex, endIndex);
beginIndex = endIndex;
beginHashChar = endHashChar;
}
}
addSubDictionary(beginHashChar, beginIndex, this.end);
}
protected char getChar(String s, int index) {
if (index >= s.length()) {
return (char)0;
}
return s.charAt(index);
}
/**
* ��λ����beginIndex��endIndex֮��(������endIndex)�Ĵ�����Ϊһ���ִʵ�
*
* @param hashChar
* @param beginIndex
* @param endIndex
*/
protected void addSubDictionary(char hashChar, int beginIndex, int endIndex) {
SubDictionaryWrap subDic = new SubDictionaryWrap(
hashChar,
createSubDictionary(ascWords, beginIndex, endIndex),
beginIndex);
Integer key = keyOf(hashChar);
if (subs.containsKey(key)) {
System.out.println("����������֣���ʾ����Ĵ������������ȷ���ʵ�������ȷ>>>>>>>>>"
+ hashChar);
}
subs.put(key, subDic);
}
protected Dictionary createSubDictionary(String[] ascWords, int beginIndex, int endIndex) {
int count = endIndex - beginIndex;
if (count < 16 ) {
return new BinaryDictionary(ascWords, beginIndex, endIndex);
}
else {
return new HashBinaryDictionary(
ascWords,
hashIndex + 1,
beginIndex,
endIndex,
getCapacity(count),
0.75f);
}
}
protected static final int [] capacityCandiate = {16, 32, 64, 128,256,512,1024,2048,4096,10192};
protected int getCapacity(int count) {
int capacity = -1;
count <<= 2;
count /= 3;
count += 1;
for (int i = 0; i < capacityCandiate.length; i++) {
if (count <= capacityCandiate[i]) {
capacity = capacityCandiate[i];
break;
}
}
if (capacity < 0) {
capacity = capacityCandiate[capacityCandiate.length - 1];
}
return capacity;
}
// -------------------------------------------------
public String get(int index) {
return ascWords[start + index];
}
public Hit search(CharSequence input, int begin, int count) {
SubDictionaryWrap subDic = subs.get(keyOf(input.charAt(hashIndex + begin)));
if (subDic == null) {
return Hit.UNDEFINED;
}
Dictionary dic = subDic.dic;
//��count==hashIndex + 1�Ĵ���
if (count == hashIndex + 1) {
String header = dic.get(0);
if (header.length() == hashIndex + 1) {
if (subDic.wordIndexOffset + 1 < this.ascWords.length) {
return new Hit(subDic.wordIndexOffset, header, this.ascWords[subDic.wordIndexOffset + 1]);
}
else {
return new Hit(subDic.wordIndexOffset, header, null);
}
}
else {
return new Hit(Hit.UNCLOSED_INDEX, null, header);
}
}
//count > hashIndex + 1
Hit word = dic.search(input, begin, count);
if (word.isHit()) {
int index = subDic.wordIndexOffset + word.getIndex();
word.setIndex(index);
if (word.getNext() == null && index < size()) {
word.setNext(get(index + 1));
}
}
return word;
}
public int size() {
return count;
}
// -------------------------------------------------
/**
* �ַ�����{@link #subs}��keyֵ��
*
* @param theChar
* @return
*
* @see #subs
*/
protected int keyOf(char theChar) {
// return theChar - 0x4E00;// 'һ'==0x4E00
return theChar;
}
/**
* �ִʵ����
*/
static class SubDictionaryWrap {
/**
* �ִʵ�����ͷ�ַ�
*/
char hashChar;
/**
* �ִʵ�
*/
Dictionary dic;
/**
* �ִʵ��һ�����������д����е�ƫ��λ��
*/
int wordIndexOffset;
public SubDictionaryWrap(char hashChar, Dictionary dic, int wordIndexOffset) {
this.hashChar = hashChar;
this.dic = dic;
this.wordIndexOffset = wordIndexOffset;
}
}
}