package com.cognitionis.external_tools; import java.io.*; import com.cognitionis.utils_basickit.FileUtils; /** * REGULAR INSTALLATION AND INCLUSION IN PATH REQUIRED * @author Hector Llorens * @since 2011 */ public class CRF { // path is not necessary but is used to capture temporal files (if there are), or default templates public static String program_path = FileUtils.getApplicationPath() + "program-data/CRF++/"; /** * Runs CRF++ over a features file given a template * and saves a model in a .CRFmodel file * * The template file must be in the same path or in program-data/CRF++ or in program-data/CRF++/templates/ * * Format | | | | pipes * * @param filename * @param template * @return Output filename */ public static String train(String featuresfile, String templatefile) { // -p number of processors // -c hyperparameter String outputfile = featuresfile + "." + templatefile.substring(0, templatefile.lastIndexOf('.')) + ".CRFmodel"; try { File tempf = new File(templatefile); if (!tempf.exists() || !tempf.isFile()) { tempf = new File(program_path + templatefile); if (!tempf.exists() || !tempf.isFile()) { tempf = new File(program_path + "templates/" + templatefile); if (!tempf.exists() || !tempf.isFile()) { throw new Exception("Template file (" + templatefile + ") not found."); } else { templatefile = program_path + "templates/" + templatefile; } } else { templatefile = program_path + templatefile; } } // CREATE APROPRIATE INPUT FORMAT String[] command = {"/bin/sh", "-c", "tr \"|\" \" \" < " + featuresfile + " | sed \"s/^[[:blank:]]*\\$//\" > " + program_path + "temp.tmp"}; Process p = Runtime.getRuntime().exec(command); BufferedReader stdInput = new BufferedReader(new InputStreamReader(p.getInputStream())); try { String line; while ((line = stdInput.readLine()) != null) { System.err.println(line); } } finally { if (stdInput != null) { stdInput.close(); } if(p!=null){ p.getInputStream().close(); p.getOutputStream().close(); p.getErrorStream().close(); p.destroy(); } } String[] command2 = {"crf_learn", "-c", "1.0", "-p", "2", templatefile, program_path + "temp.tmp", outputfile}; //System.err.println("\ncrf_learn -c 1.0 -p 2 " + templatefile + " " + featuresfile+" "+outputfile+"\n"); p = Runtime.getRuntime().exec(command2); stdInput = new BufferedReader(new InputStreamReader(p.getInputStream())); try { String line; while ((line = stdInput.readLine()) != null) { System.err.println(line); } } finally { if (stdInput != null) { stdInput.close(); } if(p!=null){ p.getInputStream().close(); p.getOutputStream().close(); p.getErrorStream().close(); p.destroy(); } } // CLEARTMP String[] command3 = {"/bin/sh", "-c", "rm -rf " + program_path + "*.tmp"}; p = Runtime.getRuntime().exec(command3); stdInput = new BufferedReader(new InputStreamReader(p.getInputStream())); try { String line; while ((line = stdInput.readLine()) != null) { System.err.println(line); } } finally { if (stdInput != null) { stdInput.close(); } if(p!=null){ p.getInputStream().close(); p.getOutputStream().close(); p.getErrorStream().close(); p.destroy(); } } } catch (Exception e) { System.err.println("Errors found (CRF++):\n\t" + e.toString()); if (System.getProperty("DEBUG") != null && System.getProperty("DEBUG").equalsIgnoreCase("true")) { e.printStackTrace(System.err); System.exit(1); } return null; } return outputfile; } /** * Runs CRF++ over a features file given a model * and saves the output as input-annotatedWith-CRFmodel-x file * * The model must be in the same path or in program-data/CRF++ * * Format | | | | pipes * * @param filename * @param template * @return Output filename */ public static String test(String featuresfile, String modelfile) { int folderposition = modelfile.lastIndexOf('/'); String outputfile = featuresfile + "-annotatedWith-CRFmodel-" + modelfile.substring(folderposition + 1, modelfile.lastIndexOf('.')); try { File tempf = new File(modelfile); if (!tempf.exists() || !tempf.isFile()) { tempf = new File(program_path + modelfile); if (!tempf.exists() || !tempf.isFile()) { tempf = new File(program_path + "models/" + modelfile); if (!tempf.exists() || !tempf.isFile()) { throw new Exception("Template file (" + modelfile + ") not found."); } else { modelfile = program_path + "models/" + modelfile; } } else { modelfile = program_path + modelfile; } } // CREATE APROPRIATE INPUT FORMAT String[] command = {"/bin/sh", "-c", "tr \"|\" \" \" < " + featuresfile + " | sed \"s/^[[:blank:]]*\\$//\" > " + program_path + "temp.tmp"}; Process p = Runtime.getRuntime().exec(command); BufferedReader stdInput = new BufferedReader(new InputStreamReader(p.getInputStream())); try { String line; while ((line = stdInput.readLine()) != null) { System.err.println(line); } } finally { if (stdInput != null) { stdInput.close(); } if(p!=null){ p.getInputStream().close(); p.getOutputStream().close(); p.getErrorStream().close(); p.destroy(); } } // EXECUTE CRF MODEL String[] command2 = {"crf_test", "-m", modelfile, program_path + "temp.tmp", "-o", program_path + "temp2.tmp"}; p = Runtime.getRuntime().exec(command2); stdInput = new BufferedReader(new InputStreamReader(p.getInputStream())); try { String line; while ((line = stdInput.readLine()) != null) { System.err.println(line); } } finally { if (stdInput != null) { stdInput.close(); } if(p!=null){ p.getInputStream().close(); p.getOutputStream().close(); p.getErrorStream().close(); p.destroy(); } } // CREATE APPROPRIATE OUTPUT FORMAT String[] command3 = {"/bin/sh", "-c", "tr \"\t\" \"|\" < " + program_path + "temp2.tmp" + " | sed '/^[[:blank:]]*$/d' >" + outputfile}; p = Runtime.getRuntime().exec(command3); stdInput = new BufferedReader(new InputStreamReader(p.getInputStream())); try { String line; while ((line = stdInput.readLine()) != null) { System.err.println(line); } } finally { if (stdInput != null) { stdInput.close(); } if(p!=null){ p.getInputStream().close(); p.getOutputStream().close(); p.getErrorStream().close(); p.destroy(); } } // CLEARTMP String[] command4 = {"/bin/sh", "-c", "rm -rf " + program_path + "*.tmp"}; p = Runtime.getRuntime().exec(command4); stdInput = new BufferedReader(new InputStreamReader(p.getInputStream())); try { String line; while ((line = stdInput.readLine()) != null) { System.err.println(line); } } finally { if (stdInput != null) { stdInput.close(); } if(p!=null){ p.getInputStream().close(); p.getOutputStream().close(); p.getErrorStream().close(); p.destroy(); } } } catch (Exception e) { System.err.println("Errors found (CRF++):\n\t" + e.toString()); if (System.getProperty("DEBUG") != null && System.getProperty("DEBUG").equalsIgnoreCase("true")) { e.printStackTrace(System.err); System.exit(1); } return null; } return outputfile; } }