package com.maalaang.omtwitter.tools; import java.io.FileInputStream; import java.io.InputStreamReader; import java.util.Properties; import org.apache.log4j.Level; import com.maalaang.omtwitter.io.LogSystemStream; import com.maalaang.omtwitter.io.OMTwitterCorpusFile; import com.maalaang.omtwitter.ml.CrfClassifier; public class TrainCrfNamedEntityRecognizer { public final static void main(String[] args) { try { Properties prop = new Properties(); prop.load(new InputStreamReader(new FileInputStream(args[0]), "UTF-8")); LogSystemStream.redirectErrToLog(Level.ERROR); CrfClassifier crf = new CrfClassifier(); String[] trainingFiles = new String[1]; trainingFiles[0] = prop.getProperty("ne.corpus.file"); crf.train(trainingFiles, prop.getProperty("ne.corpus.fields.delim"), OMTwitterCorpusFile.fieldNameToId(prop.getProperty("ne.corpus.fields"), "\\s+"), prop.getProperty("crf.model"), prop.getProperty("crf.feature.dump.file"), Boolean.parseBoolean(prop.getProperty("crf.feature.dump")), 3); } catch (Exception e) { e.printStackTrace(); } } }