/** * Copyright 2015, Emory University * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package edu.emory.clir.clearnlp.lexicon.dbpedia; import java.io.BufferedReader; import java.io.InputStream; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.io.OutputStream; import edu.emory.clir.clearnlp.collection.tree.PrefixNode; import edu.emory.clir.clearnlp.collection.tree.PrefixTree; import edu.emory.clir.clearnlp.component.utils.NLPUtils; import edu.emory.clir.clearnlp.ner.NERInfoSet; import edu.emory.clir.clearnlp.tokenization.AbstractTokenizer; import edu.emory.clir.clearnlp.util.DSUtils; import edu.emory.clir.clearnlp.util.IOUtils; import edu.emory.clir.clearnlp.util.lang.TLanguage; /** * @since 3.0.3 * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) */ public class PrefixTreeExtender { private PrefixTree<String,NERInfoSet> prefix_tree; private AbstractTokenizer tokenizer; @SuppressWarnings("unchecked") public PrefixTreeExtender(InputStream in) throws Exception { ObjectInputStream oin = IOUtils.createObjectXZBufferedInputStream(in); System.out.println("Loading"); prefix_tree = (PrefixTree<String,NERInfoSet>)oin.readObject(); tokenizer = NLPUtils.getTokenizer(TLanguage.ENGLISH); } public void extend(InputStream in, String type) throws Exception { BufferedReader reader = IOUtils.createBufferedReader(in); PrefixNode<String, NERInfoSet> node; NERInfoSet set; String[] array; String line; System.out.println("Extending"); while ((line = reader.readLine()) != null) { line = line.trim(); if (line.isEmpty()) continue; array = DSUtils.toArray(tokenizer.tokenize(line)); node = prefix_tree.add(array, 0, array.length, String::toString); set = node.getValue(); if (set == null) { set = new NERInfoSet(); node.setValue(set); } set.addCategory(type); } System.out.println(); reader.close(); } public void print(OutputStream out) throws Exception { ObjectOutputStream fout = IOUtils.createObjectXZBufferedOutputStream(out); System.out.println("Printing"); fout.writeObject(prefix_tree); fout.close(); } static public void main(String[] args) throws Exception { final String prefixFile = args[0]; final String inputFile = args[1]; final String type = args[2]; final String outputFile = args[3]; try { PrefixTreeExtender ex = new PrefixTreeExtender(IOUtils.createFileInputStream(prefixFile)); ex.extend(IOUtils.createFileInputStream(inputFile), type); ex.print(IOUtils.createFileOutputStream(outputFile)); } catch (Exception e) {e.printStackTrace();} } }