/*
* Copyright (c) 2015 University of Illinois Board of Trustees, All rights reserved.
* Developed at GSLIS/ the iSchool, by Dr. Jana Diesner, Amirhossein Aleyasen,
* Chieh-Li Chin, Shubhanshu Mishra, Kiumars Soltani, and Liang Tao.
*
* This program is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation; either version 2 of the License, or any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, see <http://www.gnu.org/licenses>.
*
*/
package context.core.task.lexisnexis;
import context.core.entity.CTask;
import context.core.entity.GenericTask;
import context.core.entity.TaskInstance;
import javafx.beans.property.DoubleProperty;
import javafx.beans.property.StringProperty;
/**
*
* @author Amirhossein Aleyasen <aleyase2@illinois.edu>
*/
public class LexisNexisParseTask extends CTask {
/**
*
* @param progress
* @param progressMessage
*/
public LexisNexisParseTask(DoubleProperty progress, StringProperty progressMessage) {
super(progress, progressMessage);
}
/**
*
* @param instance
* @param task
* @return
*/
@Override
public TaskInstance run(TaskInstance instance, GenericTask task) {
info("LexisNexis network generation:");
LexisNexisParseTaskInstance inst = (LexisNexisParseTaskInstance) instance;
info("Parsing and deduplication...");
task.progressMessage("parsing...");
task.progress(10, 100);
String outputPath = inst.getTextOutput().getPath().get();
outputPath += "//";
String parsedDir = outputPath + "splitFiles";
String uniqueDir = outputPath + "uniqueFiles";
String duplicateDir = outputPath + "duplicateFiles";
String metadataXLSFile = outputPath + "metadata.xls";
String uniqueXLSFile = outputPath + "uniqueList.xls";
String duplicateXLSFile = outputPath + "duplicateList.xls";
String textBodyParsedDir = outputPath + "splitFiles-onlyText";
String textBodyUniqueDir = outputPath + "uniqueFiles-onlyText";
String textBodyDuplicateDir = outputPath + "duplicateFiles-onlyText";
String uniqueAllTextFile = outputPath + "AllUniqueText.txt";
LxNxDataProvider lexisDataProvider = new LxNxDataProvider();
lexisDataProvider.setTextBodyDuplicateDirectory(textBodyDuplicateDir);
lexisDataProvider.setTextBodyToDirectory(textBodyParsedDir);
lexisDataProvider.setTextBodyUniqueDirectory(textBodyUniqueDir);
lexisDataProvider.setUniqueAllTextFile(uniqueAllTextFile);
lexisDataProvider.parseAndDeduplicate(inst.getInput().getPath().get(), parsedDir, uniqueDir, duplicateDir, metadataXLSFile, uniqueXLSFile, duplicateXLSFile);
task.progress(100, 100);
task.progressMessage("Done.");
info("LexisNexis network generation Done.");
return instance;
}
}