/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.manifoldcf.agents.transformation.opennlp; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.File; import java.io.InputStream; import java.util.HashMap; import java.util.Map; import opennlp.tools.namefind.NameFinderME; import opennlp.tools.namefind.TokenNameFinderModel; import opennlp.tools.sentdetect.SentenceDetector; import opennlp.tools.sentdetect.SentenceDetectorME; import opennlp.tools.sentdetect.SentenceModel; import opennlp.tools.tokenize.Tokenizer; import opennlp.tools.tokenize.TokenizerME; import opennlp.tools.tokenize.TokenizerModel; import opennlp.tools.util.InvalidFormatException; public class OpenNlpExtractorConfig { // Specification nodes and values public static final String NODE_SMODEL_PATH = "SModelPath"; public static final String NODE_TMODEL_PATH = "TModelPath"; public static final String NODE_FINDERMODEL = "FinderModel"; public static final String ATTRIBUTE_VALUE = "value"; public static final String ATTRIBUTE_PARAMETERNAME = "parametername"; public static final String ATTRIBUTE_MODELFILE = "modelfile"; private final static Map<File,SentenceModel> sModels = new HashMap<>(); private final static Map<File,TokenizerModel> tModels = new HashMap<>(); private final static Map<File,TokenNameFinderModel> tnfModels = new HashMap<>(); protected static SentenceModel loadSModel(final File path) throws InvalidFormatException, FileNotFoundException, IOException { synchronized (sModels) { SentenceModel sd = sModels.get(path); if (sd == null) { final InputStream is = new FileInputStream(path); try { sd = new SentenceModel(is); } finally { is.close(); } sModels.put(path, sd); } return sd; } } protected static TokenizerModel loadTModel(final File path) throws InvalidFormatException, FileNotFoundException, IOException { synchronized (tModels) { TokenizerModel sd = tModels.get(path); if (sd == null) { final InputStream is = new FileInputStream(path); try { sd = new TokenizerModel(is); } finally { is.close(); } tModels.put(path, sd); } return sd; } } protected static TokenNameFinderModel loadTnfModel(final File path) throws InvalidFormatException, FileNotFoundException, IOException { synchronized (tnfModels) { TokenNameFinderModel sd = tnfModels.get(path); if (sd == null) { final InputStream is = new FileInputStream(path); try { sd = new TokenNameFinderModel(is); } finally { is.close(); } tnfModels.put(path, sd); } return sd; } } public static final SentenceDetector sentenceDetector(File path) throws InvalidFormatException, FileNotFoundException, IOException{ return new SentenceDetectorME(loadSModel(path)); } public static final Tokenizer tokenizer(File path) throws InvalidFormatException, FileNotFoundException, IOException{ return new TokenizerME(loadTModel(path)); } public static final NameFinderME finder(File path) throws InvalidFormatException, FileNotFoundException, IOException{ return new NameFinderME(loadTnfModel(path)); } }