package lia.analysis.i18n;
/**
* Copyright Manning Publications Co.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific lan
*/
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.analysis.cn.ChineseAnalyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.util.Version;
import java.awt.Font;
import java.awt.FontMetrics;
import java.awt.Frame;
import java.awt.Label;
import java.io.IOException;
import java.io.StringReader;
// From chapter 4
public class ChineseDemo {
private static String[] strings = {"道德經"}; //A
private static Analyzer[] analyzers = {
new SimpleAnalyzer(),
new StandardAnalyzer(Version.LUCENE_30),
new ChineseAnalyzer (), //B
new CJKAnalyzer (Version.LUCENE_30),
new SmartChineseAnalyzer (Version.LUCENE_30)
};
public static void main(String args[]) throws Exception {
for (String string : strings) {
for (Analyzer analyzer : analyzers) {
analyze(string, analyzer);
}
}
}
private static void analyze(String string, Analyzer analyzer)
throws IOException {
StringBuffer buffer = new StringBuffer();
TokenStream stream = analyzer.tokenStream("contents",
new StringReader(string));
TermAttribute term = stream.addAttribute(TermAttribute.class);
while(stream.incrementToken()) { //C
buffer.append("[");
buffer.append(term.term());
buffer.append("] ");
}
String output = buffer.toString();
Frame f = new Frame();
f.setTitle(analyzer.getClass().getSimpleName() + " : " + string);
f.setResizable(true);
Font font = new Font(null, Font.PLAIN, 36);
int width = getWidth(f.getFontMetrics(font), output);
f.setSize((width < 250) ? 250 : width + 50, 75);
// NOTE: if Label doesn't render the Chinese characters
// properly, try using javax.swing.JLabel instead
Label label = new Label(output); //D
label.setSize(width, 75);
label.setAlignment(Label.CENTER);
label.setFont(font);
f.add(label);
f.setVisible(true);
}
private static int getWidth(FontMetrics metrics, String s) {
int size = 0;
int length = s.length();
for (int i = 0; i < length; i++) {
size += metrics.charWidth(s.charAt(i));
}
return size;
}
}
/*
#A Analyze this text
#B Test these analyzers
#C Retrieve tokens
#D Display analysis
*/