package eu.fbk.knowledgestore.populator.naf; import eu.fbk.knowledgestore.KnowledgeStore; import eu.fbk.knowledgestore.OperationException; import eu.fbk.knowledgestore.Session; import eu.fbk.knowledgestore.client.Client; import eu.fbk.knowledgestore.populator.naf.connection.KnowledgestoreServer; import org.apache.commons.cli.*; import org.openrdf.model.impl.URIImpl; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.xml.bind.JAXBContext; import javax.xml.bind.JAXBException; import javax.xml.bind.Unmarshaller; import java.io.*; import java.net.URL; import java.util.Hashtable; import java.util.LinkedList; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.BlockingQueue; public class nafPopulator { public static int KSresourceReplacement = 1; //Default 1=discard the new, 2=ignore repopulate, 3=delete repopulate static statistics globalStats = new statistics(); static Writer out, mentionFile; static int batchSize = 1, consumer_threads = 1; static String disabledItems = "", reportFileName = "report.txt", mentionsF = "records.txt"; static boolean recursion = false, printToFile = false,JobFinished=false; static boolean store_partial_info = false; static boolean FInFile=false; //to keep track if the input is a file containing paths of NAFs static boolean ZInFile=false; //to keep track if the input is a zip archive containing NAF files static boolean TInFile=false; //to keep track if the input is a compressed tar archive containing NAF files static String INpath=""; private static String SERVER_URL = ""; static String USERNAME = ""; static String PASSWORD = ""; static Session session = null; static KnowledgeStore store = null; // static Hashtable<String, KSPresentation> mentions = new Hashtable<String, KSPresentation>(); static String populatorVersion = "V0.1"; static Logger logger = LoggerFactory.getLogger(nafPopulator.class); static LinkedList<Thread> threads = new LinkedList<Thread>(); //Creating BlockingQueue of size 10 static BlockingQueue<Hashtable<String, KSPresentation>> queue ; static Producer producer ; static Consumer consumer ; /** * @param args * @throws IOException * @throws JAXBException * @throws ClassNotFoundException * @throws SecurityException * @throws NoSuchMethodException * @throws IllegalAccessException * @throws InstantiationException */ public static void main(String[] args) throws JAXBException, IOException, InstantiationException, IllegalAccessException, NoSuchMethodException, SecurityException, ClassNotFoundException { init(); // Configure command line options final Options options = new Options(); options.addOption("u", "ks_server_url", true, "the URL of the ks server"); options.addOption("n", "NAF_file", true, "the path to a NAF file to be processed."); options.addOption("d", "NAF_directory", true, "the path of a directory whose files are NAF files to be processed."); options.addOption("f", "file", true, "the path of a file whose content is a list of NAF paths to be processed (one for line)."); options.addOption("r", "recursive", false, "process recursively the given NAF directory (in conjunction with -d)"); options.addOption("x", "exclude", true, "the given layer is excluded wen populating the K. Currently only the 'Entity' layer can be provided as argument."); options.addOption("b", "batchsize", true, "the number of NAF files to be processed and submitted to the KS in a single step; -1 means all (WARNING: very memory consuming!), defaults to 1."); options.addOption("qs", "queueSize", true, "the number of batch queue items to be hold in memory; defaults to 2."); options.addOption("ct", "consumerThreads", true, "the number of consumer threads to be thrown simultaneously, 1 is default."); options.addOption("ksm", "ksModality", true, "Submitting to KS modality: (1=discard the new,Default) , (2=ignore previous content and populate), (3=delete previous content and repopulate)"); options.addOption("v", "version", false, "display version and copyright information, then exit"); options.addOption("h", "help", false, "display usage information, then exit"); options.addOption("spi", "store_partial_info", false, "store in the KS even partial information in case of error (try to maximize data stored), defaults to false."); options.addOption("o", "outputreportfilepath", true, "the path of the 'report' file, where individual and overall statistics are saved"); options.addOption("or", "outputrecordsfilepath", true, "the path of the 'record' file, where mentions and resources objects are saved"); options.addOption("p", "parsingOnly", false, "perform NAF parsing only (do not store information in the KS)"); options.addOption("z", "zip", true, "the path to a zip archive containing NAF files to be processed."); options.addOption("t", "tgz", true, "the path to a compressed tar archive (.tar.gz or .tgz) containing NAF files to be processed."); try { final CommandLine cmd = new GnuParser().parse(options, args); { //check if we have many inputs in the same call, error and exit int nafFileModalitiesCount = 0; if (cmd.hasOption("ksm")) { KSresourceReplacement=Integer.parseInt(cmd.getOptionValue("ksm")) ; } if (cmd.hasOption("n")) { nafFileModalitiesCount++;} if (cmd.hasOption("d")) { nafFileModalitiesCount++;} if (cmd.hasOption("f")) { nafFileModalitiesCount++;} if (cmd.hasOption("z")) { nafFileModalitiesCount++;} if (nafFileModalitiesCount > 1) { System.err.println("Cannot manage multiple options(-n|-d|-f|-z): please choice one of them."); printUsage(options); System.exit(0); } } if (cmd.hasOption("u")) { SERVER_URL = cmd.getOptionValue('u'); } if (cmd.hasOption("ct")) { consumer_threads=Integer.parseInt(cmd.getOptionValue("ct")) ; } //TODO important if any illegal input crash with error message if (cmd.hasOption("qs")) { queue = new ArrayBlockingQueue<>(Integer.parseInt(cmd.getOptionValue("qs"))) ; }else{ queue = new ArrayBlockingQueue<>(2) ; } producer = new Producer(queue); consumer = new Consumer(queue); if (cmd.hasOption("o")) { reportFileName = cmd.getOptionValue('o'); File tst = new File(reportFileName); if(tst.exists()&&!tst.isFile()&&tst.isDirectory()){ reportFileName = reportFileName +"/report.txt"; } nafPopulator.out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File( // filePath.getPath(), nafPopulator.reportFileName)), "utf-8")); } if (cmd.hasOption("or")) { mentionsF = cmd.getOptionValue("or"); File tst = new File(mentionsF); if(tst.exists()&&!tst.isFile()&&tst.isDirectory()){ mentionsF = mentionsF +"/records.txt"; } nafPopulator.mentionFile = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File( // filePath.getPath(), nafPopulator.mentionsF)), "utf-8")); } if (cmd.hasOption("p")) { printToFile = true; } if (cmd.hasOption("spi")) { store_partial_info = true; } if (cmd.hasOption("v")) { System.out.println("KnowledgeStore.populator.version: " + populatorVersion); System.exit(0); } if (cmd.hasOption("h") || (!cmd.hasOption("n") && !cmd.hasOption("d") && !cmd.hasOption("f") && !cmd.hasOption("z") && !cmd.hasOption("t"))) { printUsage(options); System.exit(0); } if (cmd.hasOption("b")) { batchSize = Integer.parseInt(cmd.getOptionValue('b')); } else { batchSize = 1; } if (cmd.hasOption("x")) { disabledItems = cmd.getOptionValue('x'); } if (cmd.hasOption("r")) { recursion = true; } if (!printToFile && (cmd.hasOption("n") || cmd.hasOption("d") || cmd.hasOption("f") || cmd.hasOption("z") || cmd.hasOption("t"))) { readConnectionFile(); } if (cmd.hasOption("n") || cmd.hasOption("d") || cmd.hasOption("f") || cmd.hasOption("z") || cmd.hasOption("t")) { if (cmd.hasOption("n")){ INpath = cmd.getOptionValue('n'); //analyzePathAndRunSystem(cmd.getOptionValue('n'), disabledItems, recursion); } else if (cmd.hasOption("d")){ INpath = cmd.getOptionValue('d'); // analyzePathAndRunSystem(cmd.getOptionValue('d'), disabledItems, recursion); } else if (cmd.hasOption("f")) { FInFile=true; INpath = cmd.getOptionValue('f'); } else if (cmd.hasOption("z")) { ZInFile=true; INpath = cmd.getOptionValue('z'); } else if (cmd.hasOption("t")) { TInFile=true; INpath = cmd.getOptionValue('t'); } //starting producer to produce messages in queue new Thread(producer).start(); //starting consumer to consume messages from queue /* ExecutorService threadPool = Executors.newFixedThreadPool(consumer_threads); threadPool.submit(consumer); threadPool.shutdown();*/ for(int i=0;i<consumer_threads;i++){ Thread a = new Thread(consumer); a.start(); threads.addLast(a); } // new Thread(consumer).start(); finalizeThread finalizeThreadObj = new finalizeThread(); new Thread(finalizeThreadObj).start(); } } catch (final ParseException ex) { // Display error message and then usage on syntax error System.err.println("SYNTAX ERROR: " + ex.getMessage()); printUsage(options); } catch (final Throwable ex) { // Display error message and stack trace on generic error System.err.print("EXECUTION FAILED: "); ex.printStackTrace(); printUsage(options); } } private static void init() { globalStats = new statistics(); out=null; mentionFile = null; disabledItems = ""; reportFileName = "report.txt"; mentionsF = "records.txt"; recursion = false; printToFile = false; store_partial_info = false; SERVER_URL = ""; USERNAME = ""; PASSWORD = ""; session = null; store = null; //mentions = new Hashtable<String, KSPresentation>(); logger = LoggerFactory.getLogger(nafPopulator.class); } static void nullObjects() throws IOException { nafPopulator.closeConnection(); if (nafPopulator.mentionFile != null) { nafPopulator.mentionFile.flush(); nafPopulator.mentionFile.close(); } if (nafPopulator.out != null) { nafPopulator.out.flush(); nafPopulator.out.close(); } globalStats = null; out=null; mentionFile = null; batchSize = 1; disabledItems = null; reportFileName = null; mentionsF = null; recursion = false; printToFile = false; store_partial_info = false; session = null; store = null; // mentions = null; } private static void printUsage(Options options) { int WIDTH = 80; final PrintWriter out = new PrintWriter(System.out); final HelpFormatter formatter = new HelpFormatter(); formatter.printUsage(out, WIDTH, "eu.fbk.knowledgestore.populator.naf.nafPopulator", options); out.println("\nOptions"); formatter.printOptions(out, WIDTH, options, 2, 2); out.flush(); } static void updatestats(statistics st) { globalStats.setObjectMention(globalStats.getObjectMention() + st.getObjectMention()); globalStats.setPER(globalStats.getPER() + st.getPER()); globalStats.setORG(globalStats.getORG() + st.getORG()); globalStats.setLOC(globalStats.getLOC() + st.getLOC()); globalStats.setFin(globalStats.getFin() + st.getFin()); globalStats.setMix(globalStats.getMix() + st.getMix()); globalStats.setPRO(globalStats.getPRO() + st.getPRO()); globalStats.setNo_mapping(globalStats.getNo_mapping() + st.getNo_mapping()); globalStats.setTimeMention(globalStats.getTimeMention() + st.getTimeMention()); globalStats.setEventMention(globalStats.getEventMention() + st.getEventMention()); globalStats.setParticipationMention(globalStats.getParticipationMention() + st.getParticipationMention()); globalStats.setEntity(globalStats.getEntity() + st.getEntity()); globalStats.setCoref(globalStats.getCoref() + st.getCoref()); globalStats.setFactuality(globalStats.getFactuality() + st.getFactuality()); globalStats.setRole(globalStats.getRole() + st.getRole()); globalStats.setRolewithEntity(globalStats.getRolewithEntity() + st.getRolewithEntity()); globalStats.setRolewithoutEntity(globalStats.getRolewithoutEntity() + st.getRolewithoutEntity()); globalStats.setSrl(globalStats.getSrl() + st.getSrl()); globalStats.setTimex(globalStats.getTimex() + st.getTimex()); globalStats.setTlinkMention(globalStats.getTlinkMention() + st.getTlinkMention()); globalStats.setTlinkMentionDiscarded(globalStats.getTlinkMentionDiscarded() + st.getTlinkMentionDiscarded()); globalStats.setClinkMention(globalStats.getClinkMention() + st.getClinkMention()); globalStats.setClinkMentionDiscarded(globalStats.getClinkMentionDiscarded() + st.getClinkMentionDiscarded()); globalStats.setTlinkMentionsEnriched(globalStats.getTlinkMentionsEnriched() + st.getTlinkMentionsEnriched()); globalStats.setCorefMentionEvent(globalStats.getCorefMentionEvent() + st.getCorefMentionEvent()); globalStats.setCorefMentionNotEvent(globalStats.getCorefMentionNotEvent() + st.getCorefMentionNotEvent()); } public static void readConnectionFile() throws UnsupportedEncodingException, JAXBException, IOException { String resourceName = "populator-ks-connection.xml"; URL url = nafPopulator.class.getResource(resourceName); JAXBContext jc = JAXBContext.newInstance("eu.fbk.knowledgestore.populator.naf.connection"); Unmarshaller unmarshaller = jc.createUnmarshaller(); if (url != null) { KnowledgestoreServer myFile = (KnowledgestoreServer) unmarshaller .unmarshal(new InputStreamReader(url.openStream(), "UTF-8")); // read SERVER_URL from XML file unless previously defined with command line parameters if (SERVER_URL.equals("")) { SERVER_URL = myFile.getUrl(); } USERNAME = myFile.getUsername(); PASSWORD = myFile.getPassword(); checkSession(); } else { System.err .println("Error: populator-ks-connection.xml.xml file not found!\nYou should first create the connection file to the KS."); } } static void checkSession() { logger.info("checkSession SERVER_URL |" + SERVER_URL + "|"); if (store == null) { // Initialize a KnowledgeStore client store = Client.builder(SERVER_URL).maxConnections(16).validateServer(false).build(); } if (store != null && (session == null || session.isClosed())) { // Acquire a session for a given username/password pair session = store.newSession(USERNAME, PASSWORD); } if (store == null || session == null || session.isClosed()) { String errMsg = ""; if (store == null) { errMsg = "null store"; } else if (session == null) { errMsg = "null session"; } else { errMsg = "closed session"; } logger.error("checkSession with SERVER_URL " + SERVER_URL + " : " + errMsg); System.exit(0); } try { session.download(new URIImpl("http://localhost/test")).exec(); } catch (IllegalStateException e) { e.printStackTrace(); logger.error("checkSession with SERVER_URL " + SERVER_URL + " : IllegalStateException"); System.exit(0); } catch (OperationException e) { e.printStackTrace(); logger.error("checkSession with SERVER_URL " + SERVER_URL + " : OperationException"); System.exit(0); } } static void closeConnection() { if(session!=null && !session.isClosed()){ // Close the session session.close(); } if(store!=null&&!store.isClosed()){ // Ensure to close the KS (will also close pending sessions) store.close(); } } }