package org.fastcatsearch.plugin.analysis; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.AnalyzerOption; import org.apache.lucene.analysis.tokenattributes.*; import org.fastcatsearch.env.Environment; import org.fastcatsearch.ir.analysis.AnalyzerPool; import org.fastcatsearch.ir.io.CharVector; import org.fastcatsearch.plugin.LicenseInvalidException; import org.fastcatsearch.plugin.Plugin; import org.fastcatsearch.plugin.PluginSetting; import org.fastcatsearch.settings.SettingFileNames; import org.fastcatsearch.util.DynamicClassLoader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.xml.bind.JAXBContext; import javax.xml.bind.JAXBException; import javax.xml.bind.Unmarshaller; import java.io.*; import java.util.Iterator; import java.util.List; import java.util.Scanner; /** * Created by swsong on 2015. 7. 10.. */ public class RunAnalyzer { protected static Logger logger = LoggerFactory.getLogger(RunAnalyzer.class); private AnalysisPlugin plugin; private String pluginId; public static void main(String[] args) throws IOException { if (args.length != 3) { printUsage(); System.exit(0); } File pluginDir = new File(args[0]); String pluginClassName = args[1]; String analyzerId = args[2]; RunAnalyzer runAnalyzer = new RunAnalyzer(pluginDir, pluginClassName); AnalyzerPool analyzerPool = runAnalyzer.getAnalyzerPool(analyzerId); Analyzer analyzer = null; try { analyzer = analyzerPool.getFromPool(); //사용자 입력을 계속 받아들인다. Scanner sc = new Scanner(System.in); System.out.println("=================================="); System.out.println(" Fastcat analyzer"); System.out.println(" Enter 'quit' for exit program. "); System.out.println("=================================="); System.out.print("Input String: "); while (sc.hasNextLine()) { String str = sc.nextLine(); if (str.equalsIgnoreCase("quit")) { break; } try { char[] value = str.toCharArray(); TokenStream tokenStream = analyzer.tokenStream("", new CharArrayReader(value), new AnalyzerOption()); tokenStream.reset(); CharsRefTermAttribute termAttribute = null; if (tokenStream.hasAttribute(CharsRefTermAttribute.class)) { termAttribute = tokenStream.getAttribute(CharsRefTermAttribute.class); } SynonymAttribute synonymAttribute = null; if (tokenStream.hasAttribute(SynonymAttribute.class)) { synonymAttribute = tokenStream.getAttribute(SynonymAttribute.class); } AdditionalTermAttribute additionalTermAttribute = null; if (tokenStream.hasAttribute(AdditionalTermAttribute.class)) { additionalTermAttribute = tokenStream.getAttribute(AdditionalTermAttribute.class); } StopwordAttribute stopwordAttribute = null; if (tokenStream.hasAttribute(StopwordAttribute.class)) { stopwordAttribute = tokenStream.getAttribute(StopwordAttribute.class); } CharTermAttribute charTermAttribute = tokenStream.getAttribute(CharTermAttribute.class); while (tokenStream.incrementToken()) { String word = ""; //기본적으로 분석된 단어는 CharsRefTermAttribute 에 들어있다. if (termAttribute != null) { word = termAttribute.toString(); } else { //CharsRefTermAttribute 에 넣지 않는 분석기의 경우 CharTermAttribute 에 들어있게 된다. word = charTermAttribute.toString(); } //불용어로 판단되면 건너뛴다. if (stopwordAttribute.isStopword()) { continue; } // // 분석된 단어를 출력한다. // System.out.print(">> "); System.out.println(word); //유사어가 존재하면 리스트를 출력한다. if (synonymAttribute != null) { List synonyms = synonymAttribute.getSynonyms(); if (synonyms != null) { for (Object synonymObj : synonyms) { if (synonymObj instanceof CharVector) { CharVector synonym = (CharVector) synonymObj; System.out.print("S> "); System.out.println(synonym); } else if (synonymObj instanceof List) { List synonymList = (List) synonymObj; for (Object synonym : synonymList) { System.out.print("S> "); System.out.println(synonym); } } } } } //추가단어가 존재하면 출력한다. //추가단어는 상품명분석기에서 규칙에 의해 추가로 생성되는 단어들이며, 일반적으로는 존재하지 않는다. if (additionalTermAttribute != null && additionalTermAttribute.size() > 0) { Iterator<String> termIter = additionalTermAttribute.iterateAdditionalTerms(); while (termIter.hasNext()) { String token = termIter.next(); System.out.print("A> "); System.out.println(word); } } } } catch (IOException e) { e.printStackTrace(); } System.out.print("Input String: "); } } finally { if (analyzer != null) { analyzerPool.releaseToPool(analyzer); } } System.out.print("Bye!"); } private static void printUsage() { System.out.println("Usage : java " + RunAnalyzer.class.getName() + " <pluginDir> <pluginClassName> <analyzerId>"); System.out.println("Example"); System.out.println("$ java " + RunAnalyzer.class.getName()+ " plugin/analysis/Korean org.fastcatsearch.plugin.analysis.ko.KoreanAnalysisPlugin standard"); } public RunAnalyzer(File pluginDir, String pluginClassName) { Environment env = new Environment(pluginDir.getAbsolutePath()); File pluginConfigFile = new File(pluginDir, SettingFileNames.pluginConfig); try { InputStream is = new FileInputStream(pluginConfigFile); JAXBContext analysisJc = JAXBContext.newInstance(AnalysisPluginSetting.class); Unmarshaller analysisUnmarshaller = analysisJc.createUnmarshaller(); PluginSetting pluginSetting = (PluginSetting) analysisUnmarshaller.unmarshal(is); String serverId = env.getServerId(); boolean useDB = false; plugin = (AnalysisPlugin) DynamicClassLoader.loadObject(pluginClassName, Plugin.class, new Class<?>[]{File.class, PluginSetting.class, String.class}, new Object[]{pluginDir, pluginSetting, serverId}); plugin.load(useDB); pluginId = plugin.getPluginSetting().getId(); } catch (FileNotFoundException e) { logger.error("{} plugin 설정파일을 읽을수 없음.", pluginDir.getName()); } catch (JAXBException e) { logger.error("plugin 설정파일을 읽는중 에러. {}", e.getMessage()); } catch (IOException e) { logger.error("IO에러발생. {}", e.getMessage()); } catch (LicenseInvalidException e) { logger.error("라이선스가 유효하지 않습니다. {}", e.getMessage()); } catch (Exception e) { e.printStackTrace(); } } public AnalyzerPool getAnalyzerPool(String analyzerId) { if(plugin.isLoaded()) { AnalyzerPool pool = plugin.getAnalyzerPool(analyzerId); if(pool != null) { return pool; } else { throw new RuntimeException("Cannot find analyzer >> " + (pluginId + "." + analyzerId)); } } return null; } }