package rainbownlp.core; import java.util.List; import rainbownlp.analyzer.evaluation.ICrossfoldValidator; import rainbownlp.analyzer.evaluation.IEvaluationResult; import rainbownlp.analyzer.evaluation.classification.Evaluator; import rainbownlp.machinelearning.IMLExampleBuilder; import rainbownlp.machinelearning.LearnerEngine; import rainbownlp.machinelearning.MLExample; import rainbownlp.preprocess.DocumentAnalyzer.InputType; import rainbownlp.preprocess.SimpleDocumentLoader; /** * This includes several sugar methods to make using RNLP easier * @author eemadzadeh * */ public class RainbowEngine { List<Artifact> documentsInPipe = null; List<MLExample> trainExamplesInPipe = null; List<MLExample> testExamplesInPipe = null; public static enum DatasetType{ TRAIN_SET, TEST_SET } /** * Load the input data * @param inputRootPath * @param inputType * @return */ public RainbowEngine readInput(String inputRootPath, InputType inputType, DatasetType datasetType){ switch (inputType) { case TextFiles: SimpleDocumentLoader loader = new SimpleDocumentLoader(); loader.setDatasetType(datasetType); documentsInPipe = loader.processDocuments(inputRootPath); break; default: break; } return this; } /** * Create example on the latest documents loaded by readInput and train the model * @param exampleBuilder * @param learner * @return * @throws Exception */ public RainbowEngine train(IMLExampleBuilder exampleBuilder, LearnerEngine learner) throws Exception{ trainExamplesInPipe = exampleBuilder.getExamples(DatasetType.TRAIN_SET.name()); learner.train(trainExamplesInPipe); return this; } /** * Create example on the latest documents loaded by readInput and apply trained model * @param exampleBuilder * @param learner * @return * @throws Exception */ public IEvaluationResult test(IMLExampleBuilder exampleBuilder, LearnerEngine learner) throws Exception{ testExamplesInPipe = exampleBuilder.getExamples(DatasetType.TEST_SET.name()); learner.test(testExamplesInPipe); return Evaluator.getEvaluationResult(testExamplesInPipe); } /** * Perform classfold validation on the trainset. trainset must be loaded before calling this method with readInput * @param cfValidator * @param exampleBuilder * @param folds * @return * @throws Exception */ public IEvaluationResult crossValidate(ICrossfoldValidator cfValidator, IMLExampleBuilder exampleBuilder, int folds) throws Exception{ if(trainExamplesInPipe == null) trainExamplesInPipe = exampleBuilder.getExamples(DatasetType.TRAIN_SET.name()); return cfValidator.crossValidation(trainExamplesInPipe, folds); } }