/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package net.paoding.analysis.dictionary.support.filewords;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Map;
import net.paoding.analysis.dictionary.Word;
import net.paoding.analysis.knife.CharSet;
/**
*
* @author Zhiliang Wang [qieqie.wang@gmail.com]
*
* @since 1.0
*
*/
public class SimpleReadListener implements ReadListener {
private Map/* <String, Set<Word>> */dics = new Hashtable/* <String, Set<Word>> */();
private HashSet/* <Word> */words = new HashSet/* <Word> */();
private String ext = ".dic";
public SimpleReadListener(String ext) {
this.ext = ext;
}
public SimpleReadListener() {
}
public boolean onFileBegin(String file) {
if (!file.endsWith(ext)) {
return false;
}
words = new HashSet/* <String> */();
return true;
}
public void onFileEnd(String file) {
String name = file.substring(0, file.length() - 4);
dics.put(name, words);
words = null;
}
public void onWord(String wordText) {
wordText = wordText.trim().toLowerCase();
if (wordText.length() == 0 || wordText.charAt(0) == '#'
|| wordText.charAt(0) == '-') {
return;
}
// 去除汉字数字词
for (int i = 0; i < wordText.length(); i++) {
char ch = wordText.charAt(i);
int num = CharSet.toNumber(ch);
if (num >= 0) {
if (i == 0) {
if (num > 10) {// "十二" vs "千万"
break;
}
}
if (num == 2) {
if (wordText.equals("两") || wordText.equals("两两")) {
break;
}
}
if (i + 1 == wordText.length()) {
return;
}
} else {
break;
}
}
int index = wordText.indexOf('[');
if (index == -1) {
words.add(new Word(wordText));
}
else {
Word w = new Word(wordText.substring(0, index));
int mindex = wordText.indexOf("m=", index);
if (mindex != -1) {
int mEndIndex = wordText.indexOf("]", mindex);
String m = wordText.substring(mindex + "m=".length(), mEndIndex);
w.setModifiers(Integer.parseInt(m));
words.add(w);
}
}
}
public Map/* <String, Set<String>> */getResult() {
return dics;
}
}