/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.sohospace.dictionary.support;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Set;
import com.sohospace.dictionary.support.filewords.FileWordsReader;
/**
*
* @author Zhiliang Wang [qieqie.wang@gmail.com]
*
* @since 1.0
*
*/
public class Utils {
public static int compare(CharSequence one, int begin, int count, CharSequence theOther) {
for (int i = begin, j = 0; i < one.length() && j < Math.min(theOther.length(), count); i++, j++) {
if (one.charAt(i) > theOther.charAt(j)){
return 1;
}
else if (one.charAt(i) < theOther.charAt(j)){
return -1;
}
}
return count - theOther.length();
}
public static void main(String[] args) throws IOException {
String dir = "CJK/locale/";
// String name = "base";
// String name = "����";
String name = "����";
// String name = "����-���";
// String name = "x������λ";
// String name = "����-�й�";
// String name = "xcharacter";
// String name = "���ս���";
LinkedList<String> words = FileWordsReader.readWords(
"dic/" + dir + name + ".dic").get(name);
Set<String> set = new HashSet<String>(words);
String[] array = set.toArray(new String[]{});
Arrays.sort(array);
//String last = "";
for (int i = 0; i < array.length; i++) {
// if (array[i].compareTo(last) <= 0) {
// System.out.println(array[i] + "----" + last);
// }
// last = array[i];
System.out.println(array[i]);
}
System.out.println("-" + array.length);
}
public static void main0(String[] args) throws IOException {
String dir = "CJK";
String name = "base";
// String name = "����";
// String name = "����-���";
// String name = "x������λ";
// String name = "����-�й�";
// String name = "xcharacter";
// String name = "���ս���";
LinkedList<String> words = FileWordsReader.readWords(
"dic/" + dir + name + ".dic").get(name);
Set<String> set = new HashSet<String>(words);
String[] array = set.toArray(new String[]{});
Arrays.sort(array);
for (int i = 0; i < array.length; i++) {
System.out.println(array[i]);
}
System.out.println("-" + array.length);
}
public static void main5(String[] args) throws IOException {
String dir = "CJK/";
String name = "base";
HashSet<Integer> �ַ��� = new HashSet<Integer>();
�ַ���.add((int)'��');
�ַ���.add((int)'һ');
�ַ���.add((int)'��');
�ַ���.add((int)'��');
�ַ���.add((int)'��');
�ַ���.add((int)'��');
�ַ���.add((int)'��');
�ַ���.add((int)'��');
�ַ���.add((int)'��');
�ַ���.add((int)'��');
�ַ���.add((int)'��');
�ַ���.add((int)'��');
�ַ���.add((int)'ʮ');
�ַ���.add((int)'��');
�ַ���.add((int)'ǧ');
�ַ���.add((int)'��');
�ַ���.add((int)'��');
LinkedList<String> words = FileWordsReader.readWords(
"dic/" + dir + name + ".dic").get(name);
System.out.println(words.size());
Iterator<String> iter = words.iterator();
while (iter.hasNext()) {
String Ԫ�� = (String) iter.next();
if (Ԫ��.equals("��ʮ��")) {
System.out.println("--" + Ԫ��);
}
int i = 0;
for (; i < Ԫ��.length(); i++) {
if (!�ַ���.contains((int)Ԫ��.charAt(i))) {
break;
}
}
if (Ԫ��.equals("��ʮ��")) {
System.out.println(i);
}
if (i == Ԫ��.length()) {
System.out.println(Ԫ��);
iter.remove();
}
}
System.out.println(words.size());
}
public static boolean outb(char c) {
return true;
}
/**
* �ִ�ȫ��ת��ǵĺ���(DBC case)
* ȫ�ǿո�Ϊ12288����ǿո�Ϊ32
* �����ַ����(33-126)��ȫ��(65281-65374)�Ķ�Ӧ��ϵ�ǣ������65248
* @param input
* @return
*/
public static char toDbcCase(char src) {
if (src == 12288) {
src = (char) 32;
}
else if (src > 65280 && src < 65375) {
src = (char) (src - 65248);
}
return src;
}
}