package eu.fbk.knowledgestore.populator.naf;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.Hashtable;
import java.util.LinkedList;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import javax.xml.bind.JAXBException;
import org.apache.commons.compress.archivers.tar.*;
import org.apache.commons.compress.compressors.gzip.*;
import org.apache.commons.compress.utils.IOUtils;
import org.slf4j.Logger;
public class NAFRunner {
void generate(){
try {
if (nafPopulator.FInFile) {
/*
input is a file whose content is a list of NAF paths to be processed (one for line)
*/
FileInputStream in = new FileInputStream(nafPopulator.INpath);
Reader reader = new InputStreamReader(in, "utf8");
BufferedReader br = new BufferedReader(reader);
String line = "";
LinkedList<File> fileslist = new LinkedList<File>();
while ((line = br.readLine()) != null) {
if(fileslist.size() >= nafPopulator.batchSize){
RunSystemOnList(fileslist, nafPopulator.disabledItems, nafPopulator.recursion);
fileslist.clear();
}
File e=new File(line);
if(e.exists())
fileslist.addLast(e);
else {
System.err.println("Path not exist!" + e.getPath());
}
}
if(fileslist.size()>0){
RunSystemOnList(fileslist, nafPopulator.disabledItems, nafPopulator.recursion);
fileslist.clear();
}
in.close();
} else if (nafPopulator.ZInFile) {
/*
input is a zip archive containing NAF files to be processed
*/
String ZIP_OUTPUT_DIR = "/tmp/nafPopulatorZipOutDir";
byte[] buffer = new byte[1024];
LinkedList<File> fileslist = new LinkedList<File>();
boolean multipleFileFlag = (nafPopulator.batchSize > 1);
// create output directory is not exists
File zipDir = new File(ZIP_OUTPUT_DIR);
if (!zipDir.exists()) {
zipDir.mkdir();
}
// get the zip file content
ZipInputStream zis = new ZipInputStream(new FileInputStream(nafPopulator.INpath));
// iterate over zipped file list entry
ZipEntry ze = zis.getNextEntry();
while (ze != null) {
// if it is a directory, then skip it, else copy the file contents
//
if (ze.isDirectory()) {
/*
String zeName = ze.getName();
File extractedDir = new File(ZIP_OUTPUT_DIR + File.separator + zeName);
extractedDir.mkdirs();
System.out.println("ROL2: created new dir " + extractedDir.getAbsoluteFile());
*/
} else {
String zeName = ze.getName();
// just use the basename of the file
File tmpFile = new File(ZIP_OUTPUT_DIR + File.separator + zeName);
String basename = tmpFile.getName();
File extractedFile = new File(ZIP_OUTPUT_DIR + File.separator + basename);
String extractedPath = extractedFile.getAbsolutePath();
// create all non existing directories
//
// new File(extractedFile.getParent()).mkdirs();
FileOutputStream fos = new FileOutputStream(extractedFile);
int len;
while ((len = zis.read(buffer)) > 0) {
fos.write(buffer, 0, len);
}
fos.close();
// System.out.println("ROL3: created new file " + extractedPath + " |" + zeName + "|");
if (multipleFileFlag) {
// if needed invocate populator on the fileslist and delete the extracted files
//
if (fileslist.size() >= nafPopulator.batchSize) {
RunSystemOnList(fileslist, nafPopulator.disabledItems, nafPopulator.recursion);
fileslist.clear();
}
// add the file to the fileslist
//
fileslist.addLast(extractedFile);
} else {
// invocate the populator of the extracted file
//
analyzePathAndRunSystem(extractedPath, nafPopulator.disabledItems, nafPopulator.recursion);
}
}
// close entry and get a new one
zis.closeEntry();
ze = zis.getNextEntry();
}
// if needed invocate populator on the fileslist and delete the extracted files
//
if (multipleFileFlag && (fileslist.size() > 0)) {
RunSystemOnList(fileslist, nafPopulator.disabledItems, nafPopulator.recursion);
fileslist.clear();
}
// close entry and zip
zis.closeEntry();
zis.close();
} else if (nafPopulator.TInFile) {
/*
input is a compressed tar archive containing NAF files to be processed
*/
String TAR_OUTPUT_DIR = "/tmp/nafPopulatorTarOutDir";
byte[] buffer = new byte[1024];
LinkedList<File> fileslist = new LinkedList<File>();
boolean multipleFileFlag = (nafPopulator.batchSize > 1);
// create output directory is not exists
File tgzDir = new File(TAR_OUTPUT_DIR);
if (!tgzDir.exists()) {
tgzDir.mkdir();
}
// get the tgz file content
TarArchiveInputStream is = new TarArchiveInputStream(new GzipCompressorInputStream(new FileInputStream(nafPopulator.INpath)));
// iterate over tgz file list entry
TarArchiveEntry te = (TarArchiveEntry)is.getNextEntry();
while (te != null) {
// if it is a directory, then skip it, else copy the file contents
//
if (te.isDirectory()) {
/*
String teName = te.getName();
File extractedDir = new File(TAR_OUTPUT_DIR + File.separator + teName);
extractedDir.mkdirs();
System.out.println("ROL2: created new dir " + extractedDir.getAbsoluteFile());
*/
} else {
String teName = te.getName();
// just use the basename of the file
File tmpFile = new File(TAR_OUTPUT_DIR + File.separator + teName);
String basename = tmpFile.getName();
File extractedFile = new File(TAR_OUTPUT_DIR + File.separator + basename);
String extractedPath = extractedFile.getAbsolutePath();
// create all non existing directories
//
// new File(extractedFile.getParent()).mkdirs();
OutputStream outputFileStream = new FileOutputStream(extractedFile);
IOUtils.copy(is, outputFileStream);
outputFileStream.close();
// System.out.println("ROL2: created new file " + extractedPath + " |" + teName + "|");
if (multipleFileFlag) {
// if needed invocate populator on the fileslist and delete the extracted files
//
if (fileslist.size() >= nafPopulator.batchSize) {
RunSystemOnList(fileslist, nafPopulator.disabledItems, nafPopulator.recursion);
fileslist.clear();
}
// add the file to the fileslist
//
fileslist.addLast(extractedFile);
} else {
// invocate the populator of the extracted file
//
analyzePathAndRunSystem(extractedPath, nafPopulator.disabledItems, nafPopulator.recursion);
}
}
// get a new entry
te = (TarArchiveEntry)is.getNextEntry();
}
// if needed invocate populator on the fileslist and delete the extracted files
//
if (multipleFileFlag && (fileslist.size() > 0)) {
RunSystemOnList(fileslist, nafPopulator.disabledItems, nafPopulator.recursion);
fileslist.clear();
}
// close the tgz
is.close();
} else {
/*
input is either a NAF file or a NAF directory
*/
analyzePathAndRunSystem(nafPopulator.INpath, nafPopulator.disabledItems, nafPopulator.recursion);
}
} catch(Exception e) {
e.printStackTrace();
nafPopulator.logger.error(nafPopulator.INpath + " Processing phase: file discarded!\n");
}
nafPopulator.JobFinished=true;
}
private void RunSystemOnList(LinkedList<File> fileslist, String disabledItems, boolean rec)
throws JAXBException, IOException, InstantiationException, IllegalAccessException,
NoSuchMethodException, SecurityException, ClassNotFoundException, InterruptedException {
Hashtable<String, KSPresentation> mentions = new Hashtable<String, KSPresentation>();
boolean submittedFlag = false;
for(File filePath:fileslist){
if (filePath.exists() && filePath.isDirectory()) {
File[] listOfFiles = filePath.listFiles();
for (int i = 0; i < listOfFiles.length; i++) {
if (listOfFiles[i].exists() && listOfFiles[i].isFile()) {
// System.err.println(i + ") working with: " + listOfFiles[i].getName());
// out.append("\n" + i + "=" + listOfFiles[i].getName() + "\n");
runClass(listOfFiles[i].getPath(), disabledItems,mentions);
} else if (listOfFiles[i].exists() && listOfFiles[i].isDirectory()) {
analyzePathAndRunSystem(listOfFiles[i].getPath(), disabledItems, rec);
}
nafPopulator.out.flush();
//this is bug applied once it should be i%mod nafPopulator.batchSize==0
submittedFlag = checkAddOrSubmit(mentions);
}
if ((nafPopulator.batchSize == -1) && (! submittedFlag)) {
addAndFreeMemory(mentions);
}
} else if (filePath.exists() && filePath.isFile()) {
// out.append(filePath.getPath() + "\n");
runClass(filePath.getPath(), disabledItems,mentions);
submittedFlag = checkAddOrSubmit(mentions);
}
}
if(! submittedFlag){
addAndFreeMemory(mentions);
}
nafPopulator.out.flush();
if (nafPopulator.printToFile && (nafPopulator.mentionFile != null)) {
nafPopulator.mentionFile.flush();
}
}
/*
return true if mentions have been submitted on the queue
*/
boolean checkAddOrSubmit(Hashtable<String, KSPresentation> mentions) throws InterruptedException{
if (((mentions.size() % nafPopulator.batchSize) == 0) && (nafPopulator.batchSize != -1)) {
addAndFreeMemory(mentions);
return true;
} else {
return false;
}
}
void addAndFreeMemory(Hashtable<String, KSPresentation> mentions) throws InterruptedException{
Producer.queue.put(mentions);
// empty the heap memory
mentions = new Hashtable<String, KSPresentation>();
System.gc();
Runtime.getRuntime().gc();
}
private void analyzePathAndRunSystem(String path, String disabledItems, boolean rec)
throws JAXBException, IOException, InstantiationException, IllegalAccessException,
NoSuchMethodException, SecurityException, ClassNotFoundException, InterruptedException {
File filePath = new File(path);
if (filePath.exists()) {
if (filePath.exists() && filePath.isDirectory()) {
// create report file in the same directory of the input file path.
File[] listOfFiles = filePath.listFiles();
Hashtable<String, KSPresentation> mentions = new Hashtable<String, KSPresentation>();
for (int i = 0; i < listOfFiles.length; i++) {
if (listOfFiles[i].exists() && listOfFiles[i].isFile()) {
// System.err.println(i + ") working with: " + listOfFiles[i].getName());
// out.append("\n" + i + "=" + listOfFiles[i].getName() + "\n");
runClass(listOfFiles[i].getPath(), disabledItems,mentions);
} else if (listOfFiles[i].exists() && listOfFiles[i].isDirectory()) {
analyzePathAndRunSystem(listOfFiles[i].getPath(), disabledItems, rec);
}
//this is bug applied once it should be i%mod nafPopulator.batchSize==0
if (nafPopulator.batchSize != -1&&mentions.size() % nafPopulator.batchSize==0 ) {
// submit the collected data to KS.
/* if (!nafPopulator.printToFile) {
submitCollectedData();
} else {
appendCollectedDataToFile();
}*/
Producer.queue.put(mentions);
// empty the heap memory
mentions = new Hashtable<String, KSPresentation>();
System.gc();
Runtime.getRuntime().gc();
}
}
if(mentions.size()>0){
Producer.queue.put(mentions);
// empty the heap memory
mentions = new Hashtable<String, KSPresentation>();
System.gc();
Runtime.getRuntime().gc();
}
//TODO if batchsize ==-1 so submit all once?! check it
if (nafPopulator.batchSize == -1) {
// submit the collected data to KS then it should finish as no other files
/* if (!nafPopulator.printToFile) {
submitCollectedData();
} else {
appendCollectedDataToFile();
}*/
Producer.queue.put(mentions);
// empty the heap memory
mentions = new Hashtable<String, KSPresentation>();
System.gc();
Runtime.getRuntime().gc();
}
} else if (filePath.exists() && filePath.isFile()) {
// out.append(filePath.getPath() + "\n");
Hashtable<String, KSPresentation> mentions = new Hashtable<String, KSPresentation>();
runClass(filePath.getPath(), disabledItems,mentions);
/*if (!nafPopulator.printToFile) {
submitCollectedData();
} else {
appendCollectedDataToFile();
}*/
Producer.queue.put(mentions);
// empty the heap memory
mentions = new Hashtable<String, KSPresentation>();
System.gc();
Runtime.getRuntime().gc();
}
if (nafPopulator.printToFile &&
(nafPopulator.mentionFile != null)) {
nafPopulator.mentionFile.flush();
}
} else {
System.err.println("Path not exist!" + filePath.getPath());
}
}
public void runClass(String path, String disabledItems, Hashtable<String, KSPresentation> mentions) throws InstantiationException,
IllegalAccessException, NoSuchMethodException, SecurityException,
ClassNotFoundException, IOException {
System.out.println(path); // TODO
String className = "eu.fbk.knowledgestore.populator.naf.processNAF";
Class clazz = Class.forName(className);
Class[] parameters = new Class[] { String.class, Writer.class, String.class, boolean.class };
Method method = clazz.getMethod("init", parameters);
Object obj = clazz.newInstance();
try {
KSPresentation as = (KSPresentation) method.invoke(obj, path, nafPopulator.out, disabledItems,
nafPopulator.store_partial_info);
if (as != null) {
mentions.put(path, as);
} else {
nafPopulator.out.append(path + " null is returned from processNAF procedure!");
}
} catch (IllegalAccessException e) {
e.printStackTrace();
nafPopulator.logger.error(path + " Processing phase: file discarded!\n");
} catch (IllegalArgumentException e) {
e.printStackTrace();
nafPopulator.logger.error(path + " Processing phase: file discarded!\n");
} catch (InvocationTargetException e) {
e.printStackTrace();
nafPopulator.logger.error(path + " Processing phase: file discarded!\n");
}
}
}