/** * @version $Id: DicCompileExecution.java 1839 2014-04-16 02:33:51Z yukihiro-kinjyo $ * * 2011/08/30 11:48:56 * @author sanenori-makiya * * Copyright 2011-2014 TIDAコンソーシアム All Rights Reserved. */ package com.tida_okinawa.corona.correction.morphem.compile; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileFilter; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.PrintStream; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; import java.util.List; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import org.eclipse.core.runtime.IProgressMonitor; import org.eclipse.core.runtime.NullProgressMonitor; import com.tida_okinawa.corona.common.DefaultLogger; import com.tida_okinawa.corona.common.Encoding; import com.tida_okinawa.corona.common.ILogger; import com.tida_okinawa.corona.correction.common.ExternalProgramExec; import com.tida_okinawa.corona.correction.common.ExternalProgramExitException; import com.tida_okinawa.corona.correction.common.FileUtil; import com.tida_okinawa.corona.correction.morphem.preference.MorphemePreference; /** * 形態素解析に使う辞書ファイルのコンパイルを実行するクラス * * @author sanenori-makiya, imai */ public class DicCompileExecution { final String DICPATH = MorphemePreference.getJumanDicDir().getAbsolutePath(); final String MAKEMAT = DICPATH + File.separator + "makemat"; //$NON-NLS-1$ final String MAKEINT = DICPATH + File.separator + "makeint"; //$NON-NLS-1$ final String MAKEPAT = DICPATH + File.separator + "makepat"; //$NON-NLS-1$ final String DICSORT = DICPATH + File.separator + "dicsort"; //$NON-NLS-1$ /** * コンソール */ final ILogger logger; /** * プログレスモニタ */ final IProgressMonitor monitor; /** * Jumanが使う辞書ファイルのリスト */ final static String[] jumanDicFilenames = { "jumandic.dat", "jumandic.mat", "jumandic.pat", "jumandic.tab", }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ /** * JUMANの標準辞書 */ final static String[] defaultDics = { "Assert.dic", "AuxV.dic", "ContentW.dic", "Demonstrative.dic", "Emoticon.dic", "Noun.hukusi.dic", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$ "Noun.keishiki.dic", "Noun.koyuu.dic", "Noun.suusi.dic", "Postp.dic", "Prefix.dic", "Rengo.dic", "Special.dic", "Suffix.dic", }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$ //$NON-NLS-7$ //$NON-NLS-8$ /** * コンストラクター * * @param logger * コマンドの出力先 null:標準出力 * @param monitor * プログレスモニタ null: モニタなし */ public DicCompileExecution(ILogger logger, IProgressMonitor monitor) { if (logger == null) { logger = new DefaultLogger(); } this.logger = logger; if (monitor == null) { monitor = new NullProgressMonitor(); } this.monitor = monitor; } /** * Jumanが使う辞書ファイルのリストを取得 * * @return 辞書ファイル一覧 */ public File[] getJumanDicFiles() { File[] juman_dic_files = new File[jumanDicFilenames.length]; File dicDir = MorphemePreference.getJumanDicDir(); for (int i = 0; i < jumanDicFilenames.length; i++) { juman_dic_files[i] = new File(dicDir, jumanDicFilenames[i]); assert (juman_dic_files[i].exists()); } return juman_dic_files; } /** * juman/dic/makedic.bat の内容を実行する * batファイルを実行しない理由 * - 冗長な処理をしない(同じファイルを作成しなおさない) * - 並列実行 * * @param dics * dicファイルのリスト * @param force * dicファイルの更新がなくても jumandic.patを作る * @return 辞書ファイルを作成したか * @throws IOException * プロセスの入出力エラー * @throws InterruptedException * プロセスの入出力同期中の割り込み * @throws ExternalProgramExitException * 異常終了 */ public boolean compile(File[] dics, boolean force) throws IOException, InterruptedException, ExternalProgramExitException { monitor.beginTask("compile JUMAN dictionary.", 5); //$NON-NLS-1$ ExternalProgramExec epe = new ExternalProgramExec(); File base = MorphemePreference.getJumanDicDir(); // 標準辞書をマージの対象にいれる List<File> dicList = new ArrayList<File>(); for (String default_dic : defaultDics) { dicList.add(new File(base, default_dic)); } for (File dic : dics) { dicList.add(dic); } dics = dicList.toArray(dics); // いらないIntファイルの削除 boolean deleteFlg = deleteIntFile(dics); // 更新の有無をチェック // Memo static内部クラスにできるかもしれない無名内部クラス。 File[] update_dics = FileUtil.getFilterFiles(dics, new FileFilter() { @Override public boolean accept(File dicFile) { File intFile = FileUtil.transPathExtension(dicFile, "int"); //$NON-NLS-1$ return FileUtil.hasUpdate(dicFile, intFile); } }); if (update_dics.length == 0 && !deleteFlg) { if (!force) { logger.getOutStream().println(Messages.DicCompileExecution_logNoJumanUpdate); return false; } } /* * make mat */ monitor.subTask("makemat"); //$NON-NLS-1$ epe.exec(new String[] { MAKEMAT }, base, null, logger.getOutStream(), logger.getErrStream()); monitor.worked(1); /* * for %%f in (*.dic) do makeint %%f */ exec(update_dics, MAKEINT); monitor.worked(1); /* * copy /b *.int jumandic.txt */ File[] int_files = new File[dics.length]; for (int i = 0; i < dics.length; i++) { int_files[i] = FileUtil.transPathExtension(dics[i], "int"); //$NON-NLS-1$ } // note: 他のターゲット・プロジェクトのintファイルもある monitor.subTask("concat"); //$NON-NLS-1$ File jumandic_txt = new File(base, "jumandic.txt"); //$NON-NLS-1$ FileUtil.concatFiles(jumandic_txt, int_files); monitor.worked(1); /* * dic sort jumandic.txt > jumandic.dat */ monitor.subTask("dicsort"); //$NON-NLS-1$ File jumandic_dat = new File(base, "jumandic.dat"); //$NON-NLS-1$ FileOutputStream jumandic_dat_out = new FileOutputStream(jumandic_dat); try { epe.exec(new String[] { DICSORT, jumandic_txt.getAbsolutePath() }, base, null, jumandic_dat_out, logger.getErrStream()); } catch (IOException e) { throw e; } finally { jumandic_dat_out.close(); } monitor.worked(1); /* * make pat */ monitor.subTask("makepat"); //$NON-NLS-1$ epe.exec(new String[] { MAKEPAT }, base, null, logger.getOutStream(), logger.getErrStream()); monitor.worked(1); monitor.done(); logger.getOutStream().println(Messages.DicCompileExecution_logJumanUpdate); return true; } private static boolean deleteIntFile(File[] dics) { boolean delete = false; File dir = MorphemePreference.getJumanDicDir(); /* 全intファイル */ File[] intFiles = FileUtil.getExtensionSelectFiles(dir, "int"); //$NON-NLS-1$ List<File> intList = new ArrayList<File>(); List<File> dicList = new ArrayList<File>(); /* リストに移す */ for (File f : intFiles) { intList.add(f); } for (File f : dics) { dicList.add(f); } /* 同じものを省く */ for (Iterator<File> ietr = intList.iterator(); ietr.hasNext();) { File intFile = ietr.next(); for (File dicFile : dicList) { /* intファイル名取得 */ File f = FileUtil.transPathExtension(dicFile, "int"); //$NON-NLS-1$ if (intFile.equals(f)) { ietr.remove(); } } } /* 残ったファイルを削除 */ for (File f : intList) { if (f.delete()) { delete = true; } } return delete; } /** * コマンド失敗の有無 */ boolean isFailed = false; /** * ファイルを処理するコマンドを実行 * コマンド [引数 ...] ファイル * * @param files * @param cmd * コマンド * @throws ExternalProgramExitException * @throws InterruptedException */ private void exec(File[] files, String... cmd) throws ExternalProgramExitException, InterruptedException { int n_thr = Runtime.getRuntime().availableProcessors(); ExecutorService executor = Executors.newFixedThreadPool(n_thr); final File dicDir = MorphemePreference.getJumanDicDir(); for (final File file : files) { final String[] args = new String[cmd.length + 1]; System.arraycopy(cmd, 0, args, 0, cmd.length); args[cmd.length] = file.getAbsolutePath(); executor.execute(new Runnable() { @Override public void run() { ByteArrayOutputStream out = new ByteArrayOutputStream(); ByteArrayOutputStream err = new ByteArrayOutputStream(); ExternalProgramExec epe = new ExternalProgramExec(); try { monitor.subTask("" + file); //$NON-NLS-1$ epe.exec(args, dicDir, null, out, err); } catch (ExternalProgramExitException e) { isFailed = true; logger.getErrStream().println(args[0] + " " + file); //$NON-NLS-1$ logger.getErrStream().println(Messages.DicCompileExecution_logEndCode + e.process.exitValue()); } catch (Exception e) { logger.getErrStream().println(e + ":" + Arrays.toString(args)); //$NON-NLS-1$ e.printStackTrace(logger.getErrStream()); } sjisConvertWrite(logger.getOutStream(), new ByteArrayInputStream(out.toByteArray())); sjisConvertWrite(logger.getErrStream(), new ByteArrayInputStream(err.toByteArray())); } }); } /* 終了を待つ */ executor.shutdown(); while (!executor.isTerminated()) { Thread.sleep(1); if (monitor.isCanceled()) { // 直ちに終了 // 実行中のタスクは完了されるので、後始末は考慮しない executor.shutdownNow(); throw new InterruptedException(); } } if (isFailed) { throw new ExternalProgramExitException(cmd, null); } } /** * コマンドの出力(Shift_JIS(※MS932)) を変換してコンソールへ出力 * * @param out * @param in */ private static void sjisConvertWrite(PrintStream out, InputStream in) { InputStreamReader isr = null; try { if (MorphemePreference.convSJIS()) { isr = new InputStreamReader(in, Encoding.MS932.toString()); } else { isr = new InputStreamReader(in); } } catch (UnsupportedEncodingException e) { isr = new InputStreamReader(in); } try { OutputStreamWriter osw = new OutputStreamWriter(out); char[] buf = new char[1024]; int n; while ((n = isr.read(buf)) > 0) { String msg = new String(buf, 0, n); osw.write(msg); } osw.flush(); } catch (IOException e) { e.printStackTrace(); } } }